{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>housing_median_age</th>\n",
       "      <th>total_rooms</th>\n",
       "      <th>total_bedrooms</th>\n",
       "      <th>population</th>\n",
       "      <th>households</th>\n",
       "      <th>median_income</th>\n",
       "      <th>median_house_value</th>\n",
       "      <th>ocean_proximity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-122.23</td>\n",
       "      <td>37.88</td>\n",
       "      <td>41.0</td>\n",
       "      <td>880.0</td>\n",
       "      <td>129.0</td>\n",
       "      <td>322.0</td>\n",
       "      <td>126.0</td>\n",
       "      <td>8.3252</td>\n",
       "      <td>452600.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-122.22</td>\n",
       "      <td>37.86</td>\n",
       "      <td>21.0</td>\n",
       "      <td>7099.0</td>\n",
       "      <td>1106.0</td>\n",
       "      <td>2401.0</td>\n",
       "      <td>1138.0</td>\n",
       "      <td>8.3014</td>\n",
       "      <td>358500.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-122.24</td>\n",
       "      <td>37.85</td>\n",
       "      <td>52.0</td>\n",
       "      <td>1467.0</td>\n",
       "      <td>190.0</td>\n",
       "      <td>496.0</td>\n",
       "      <td>177.0</td>\n",
       "      <td>7.2574</td>\n",
       "      <td>352100.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-122.25</td>\n",
       "      <td>37.85</td>\n",
       "      <td>52.0</td>\n",
       "      <td>1274.0</td>\n",
       "      <td>235.0</td>\n",
       "      <td>558.0</td>\n",
       "      <td>219.0</td>\n",
       "      <td>5.6431</td>\n",
       "      <td>341300.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-122.25</td>\n",
       "      <td>37.85</td>\n",
       "      <td>52.0</td>\n",
       "      <td>1627.0</td>\n",
       "      <td>280.0</td>\n",
       "      <td>565.0</td>\n",
       "      <td>259.0</td>\n",
       "      <td>3.8462</td>\n",
       "      <td>342200.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
       "0    -122.23     37.88                41.0        880.0           129.0   \n",
       "1    -122.22     37.86                21.0       7099.0          1106.0   \n",
       "2    -122.24     37.85                52.0       1467.0           190.0   \n",
       "3    -122.25     37.85                52.0       1274.0           235.0   \n",
       "4    -122.25     37.85                52.0       1627.0           280.0   \n",
       "\n",
       "   population  households  median_income  median_house_value ocean_proximity  \n",
       "0       322.0       126.0         8.3252            452600.0        NEAR BAY  \n",
       "1      2401.0      1138.0         8.3014            358500.0        NEAR BAY  \n",
       "2       496.0       177.0         7.2574            352100.0        NEAR BAY  \n",
       "3       558.0       219.0         5.6431            341300.0        NEAR BAY  \n",
       "4       565.0       259.0         3.8462            342200.0        NEAR BAY  "
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#特征工程\n",
    "#获取数据\n",
    "import os\n",
    "import pandas as pd\n",
    "def load_housing_data(housing_path='./'):\n",
    "    csv_path=os.path.join(housing_path,'housing.csv')\n",
    "    return pd.read_csv(csv_path)\n",
    "housing=load_housing_data()\n",
    "housing.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "创建测试集和训练集\n",
    "测试集：训练集=1：4\n",
    "'''\n",
    "import numpy as np\n",
    "def split_train_test(data,test_radio):\n",
    "    np.random.seed(42)\n",
    "    #对原来的数组进行重新洗牌，随机打乱原来的元素顺序\n",
    "    indices=np.random.permutation(len(data))\n",
    "    #获得测试集的大小\n",
    "    test_set_size=int(len(data)*test_radio)\n",
    "    #对数据进行随机切片\n",
    "    test_indices=indices[:test_set_size]\n",
    "    train_indices=indices[test_set_size:]\n",
    "    return data.iloc[test_indices],data.iloc[train_indices]\n",
    "test_set,train_set=split_train_test(housing,0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#对样本设置唯一的标识符，对标识符去hash值，去hash的最后一个字节，值《=51，256*20%。放入测试机\n",
    "#hash加密\n",
    "#hash值相同，对象不一定相同\n",
    "import hashlib\n",
    "\n",
    "def test_set_check(identifier, test_radio, hash = hashlib.md5):\n",
    "    return hash(np.int64(identifier)).digest()[-1] < 256 * test_radio  # 返回摘要，作为二进制数据字符串\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split_train_test_by_id(data, test_ratio, id_column):\n",
    "    ids = data[id_column]\n",
    "    in_test_set = ids.apply(lambda id_:test_set_check(id_, test_ratio))\n",
    "    return data.loc[~in_test_set], data.loc[in_test_set]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>housing_median_age</th>\n",
       "      <th>total_rooms</th>\n",
       "      <th>total_bedrooms</th>\n",
       "      <th>population</th>\n",
       "      <th>households</th>\n",
       "      <th>median_income</th>\n",
       "      <th>median_house_value</th>\n",
       "      <th>ocean_proximity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>-122.23</td>\n",
       "      <td>37.88</td>\n",
       "      <td>41.0</td>\n",
       "      <td>880.0</td>\n",
       "      <td>129.0</td>\n",
       "      <td>322.0</td>\n",
       "      <td>126.0</td>\n",
       "      <td>8.3252</td>\n",
       "      <td>452600.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>-122.22</td>\n",
       "      <td>37.86</td>\n",
       "      <td>21.0</td>\n",
       "      <td>7099.0</td>\n",
       "      <td>1106.0</td>\n",
       "      <td>2401.0</td>\n",
       "      <td>1138.0</td>\n",
       "      <td>8.3014</td>\n",
       "      <td>358500.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>-122.24</td>\n",
       "      <td>37.85</td>\n",
       "      <td>52.0</td>\n",
       "      <td>1467.0</td>\n",
       "      <td>190.0</td>\n",
       "      <td>496.0</td>\n",
       "      <td>177.0</td>\n",
       "      <td>7.2574</td>\n",
       "      <td>352100.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>-122.25</td>\n",
       "      <td>37.85</td>\n",
       "      <td>52.0</td>\n",
       "      <td>1274.0</td>\n",
       "      <td>235.0</td>\n",
       "      <td>558.0</td>\n",
       "      <td>219.0</td>\n",
       "      <td>5.6431</td>\n",
       "      <td>341300.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>-122.25</td>\n",
       "      <td>37.84</td>\n",
       "      <td>52.0</td>\n",
       "      <td>2535.0</td>\n",
       "      <td>489.0</td>\n",
       "      <td>1094.0</td>\n",
       "      <td>514.0</td>\n",
       "      <td>3.6591</td>\n",
       "      <td>299200.0</td>\n",
       "      <td>NEAR BAY</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   index  longitude  latitude  housing_median_age  total_rooms  \\\n",
       "0      0    -122.23     37.88                41.0        880.0   \n",
       "1      1    -122.22     37.86                21.0       7099.0   \n",
       "2      2    -122.24     37.85                52.0       1467.0   \n",
       "3      3    -122.25     37.85                52.0       1274.0   \n",
       "6      6    -122.25     37.84                52.0       2535.0   \n",
       "\n",
       "   total_bedrooms  population  households  median_income  median_house_value  \\\n",
       "0           129.0       322.0       126.0         8.3252            452600.0   \n",
       "1          1106.0      2401.0      1138.0         8.3014            358500.0   \n",
       "2           190.0       496.0       177.0         7.2574            352100.0   \n",
       "3           235.0       558.0       219.0         5.6431            341300.0   \n",
       "6           489.0      1094.0       514.0         3.6591            299200.0   \n",
       "\n",
       "  ocean_proximity  \n",
       "0        NEAR BAY  \n",
       "1        NEAR BAY  \n",
       "2        NEAR BAY  \n",
       "3        NEAR BAY  \n",
       "6        NEAR BAY  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing_with_id=housing.reset_index()#使用行索引作为id\n",
    "train_set, test_set = split_train_test_by_id(housing_with_id, 0.2, 'index')\n",
    "train_set.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#基于行索引，只能末尾插入，否则行索引会变\n",
    "#寻找稳定特征来创建唯一标识符\n",
    "housing_with_id['id']=housing['longitude']*1000+housing['latitude']\n",
    "train_set, test_set = split_train_test_by_id(housing_with_id, 0.2, 'id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>housing_median_age</th>\n",
       "      <th>total_rooms</th>\n",
       "      <th>total_bedrooms</th>\n",
       "      <th>population</th>\n",
       "      <th>households</th>\n",
       "      <th>median_income</th>\n",
       "      <th>median_house_value</th>\n",
       "      <th>ocean_proximity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14196</th>\n",
       "      <td>-117.03</td>\n",
       "      <td>32.71</td>\n",
       "      <td>33.0</td>\n",
       "      <td>3126.0</td>\n",
       "      <td>627.0</td>\n",
       "      <td>2300.0</td>\n",
       "      <td>623.0</td>\n",
       "      <td>3.2596</td>\n",
       "      <td>103000.0</td>\n",
       "      <td>NEAR OCEAN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8267</th>\n",
       "      <td>-118.16</td>\n",
       "      <td>33.77</td>\n",
       "      <td>49.0</td>\n",
       "      <td>3382.0</td>\n",
       "      <td>787.0</td>\n",
       "      <td>1314.0</td>\n",
       "      <td>756.0</td>\n",
       "      <td>3.8125</td>\n",
       "      <td>382100.0</td>\n",
       "      <td>NEAR OCEAN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17445</th>\n",
       "      <td>-120.48</td>\n",
       "      <td>34.66</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1897.0</td>\n",
       "      <td>331.0</td>\n",
       "      <td>915.0</td>\n",
       "      <td>336.0</td>\n",
       "      <td>4.1563</td>\n",
       "      <td>172600.0</td>\n",
       "      <td>NEAR OCEAN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14265</th>\n",
       "      <td>-117.11</td>\n",
       "      <td>32.69</td>\n",
       "      <td>36.0</td>\n",
       "      <td>1421.0</td>\n",
       "      <td>367.0</td>\n",
       "      <td>1418.0</td>\n",
       "      <td>355.0</td>\n",
       "      <td>1.9425</td>\n",
       "      <td>93400.0</td>\n",
       "      <td>NEAR OCEAN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2271</th>\n",
       "      <td>-119.80</td>\n",
       "      <td>36.78</td>\n",
       "      <td>43.0</td>\n",
       "      <td>2382.0</td>\n",
       "      <td>431.0</td>\n",
       "      <td>874.0</td>\n",
       "      <td>380.0</td>\n",
       "      <td>3.5542</td>\n",
       "      <td>96500.0</td>\n",
       "      <td>INLAND</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
       "14196    -117.03     32.71                33.0       3126.0           627.0   \n",
       "8267     -118.16     33.77                49.0       3382.0           787.0   \n",
       "17445    -120.48     34.66                 4.0       1897.0           331.0   \n",
       "14265    -117.11     32.69                36.0       1421.0           367.0   \n",
       "2271     -119.80     36.78                43.0       2382.0           431.0   \n",
       "\n",
       "       population  households  median_income  median_house_value  \\\n",
       "14196      2300.0       623.0         3.2596            103000.0   \n",
       "8267       1314.0       756.0         3.8125            382100.0   \n",
       "17445       915.0       336.0         4.1563            172600.0   \n",
       "14265      1418.0       355.0         1.9425             93400.0   \n",
       "2271        874.0       380.0         3.5542             96500.0   \n",
       "\n",
       "      ocean_proximity  \n",
       "14196      NEAR OCEAN  \n",
       "8267       NEAR OCEAN  \n",
       "17445      NEAR OCEAN  \n",
       "14265      NEAR OCEAN  \n",
       "2271           INLAND  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#只适用于死的\n",
    "from sklearn.model_selection import train_test_split\n",
    "#random_state 随机种子\n",
    "train_set, test_set = train_test_split(housing, test_size = 0.2, random_state = 42)\n",
    "train_set.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     6.0\n",
       "1     6.0\n",
       "2     5.0\n",
       "3     4.0\n",
       "4     3.0\n",
       "5     3.0\n",
       "6     3.0\n",
       "7     3.0\n",
       "8     2.0\n",
       "9     3.0\n",
       "10    3.0\n",
       "11    3.0\n",
       "12    3.0\n",
       "13    2.0\n",
       "14    2.0\n",
       "15    2.0\n",
       "16    2.0\n",
       "17    2.0\n",
       "18    2.0\n",
       "19    2.0\n",
       "Name: income_cat, dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing['income_cat'] = np.ceil(housing['median_income']/1.5)\n",
    "housing['income_cat'].head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     5\n",
       "1     5\n",
       "2     5\n",
       "3     4\n",
       "4     3\n",
       "5     3\n",
       "6     3\n",
       "7     3\n",
       "8     2\n",
       "9     3\n",
       "10    3\n",
       "11    3\n",
       "12    3\n",
       "13    2\n",
       "14    2\n",
       "15    2\n",
       "16    2\n",
       "17    2\n",
       "18    2\n",
       "19    2\n",
       "Name: income_cat, dtype: category\n",
       "Categories (5, int64): [1 < 2 < 3 < 4 < 5]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing['income_cat'].where(housing['income_cat'] < 5, 5.0, inplace = True )\n",
    "housing['income_cat'] = pd.cut(housing['median_income'], bins=[0., 1.5, 3.0, 4.5, 6., np.inf], labels = [1,2, 3, 4, 5]) # 把连续值转换成类别标签 \n",
    "housing['income_cat'].head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "#根据收入类别进行分层采样\n",
    "from sklearn.model_selection import StratifiedShuffleSplit\n",
    "#n_splits为分成几对\n",
    "split = StratifiedShuffleSplit(n_splits=1, test_size = 0.2, random_state = 42 )\n",
    "#按照类别随机取样\n",
    "for  train_index, test_index in split.split(housing, housing['income_cat']):\n",
    "    strat_train_set = housing.loc[train_index]\n",
    "    strat_test_set = housing.loc[test_index]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3    0.350533\n",
       "2    0.318798\n",
       "4    0.176357\n",
       "5    0.114583\n",
       "1    0.039729\n",
       "Name: income_cat, dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#验证采样的数据是否符合收入类别分布的\n",
    "strat_test_set['income_cat'].value_counts()/len(strat_test_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3    0.350581\n",
       "2    0.318847\n",
       "4    0.176308\n",
       "5    0.114438\n",
       "1    0.039826\n",
       "Name: income_cat, dtype: float64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing['income_cat'].value_counts()/len(housing)  # 完整数据集合"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def income_cat_proportions(data):\n",
    "    return data[\"income_cat\"].value_counts() / len(data)\n",
    "\n",
    "train_set, test_set = train_test_split(housing, test_size=0.2, random_state=42)\n",
    "\n",
    "compare_props = pd.DataFrame({\n",
    "    \"全部数据\": income_cat_proportions(housing),\n",
    "    \"分层抽样\": income_cat_proportions(strat_test_set),\n",
    "    \"随机抽样\": income_cat_proportions(test_set),\n",
    "}).sort_index()\n",
    "compare_props[\"随机. %error\"] = 100 * compare_props[\"随机抽样\"] / compare_props[\"全部数据\"] - 100\n",
    "compare_props[\"分层. %error\"] = 100 * compare_props[\"分层抽样\"] / compare_props[\"全部数据\"] - 100"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "# 寻找特征相关性\n",
    "填写自己的感悟和理解，更多的特征处理尝试 20分 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>housing_median_age</th>\n",
       "      <th>total_rooms</th>\n",
       "      <th>total_bedrooms</th>\n",
       "      <th>population</th>\n",
       "      <th>households</th>\n",
       "      <th>median_income</th>\n",
       "      <th>median_house_value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>longitude</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.924664</td>\n",
       "      <td>-0.108197</td>\n",
       "      <td>0.044568</td>\n",
       "      <td>0.069608</td>\n",
       "      <td>0.099773</td>\n",
       "      <td>0.055310</td>\n",
       "      <td>-0.015176</td>\n",
       "      <td>-0.045967</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>latitude</th>\n",
       "      <td>-0.924664</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.011173</td>\n",
       "      <td>-0.036100</td>\n",
       "      <td>-0.066983</td>\n",
       "      <td>-0.108785</td>\n",
       "      <td>-0.071035</td>\n",
       "      <td>-0.079809</td>\n",
       "      <td>-0.144160</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>housing_median_age</th>\n",
       "      <td>-0.108197</td>\n",
       "      <td>0.011173</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.361262</td>\n",
       "      <td>-0.320451</td>\n",
       "      <td>-0.296244</td>\n",
       "      <td>-0.302916</td>\n",
       "      <td>-0.119034</td>\n",
       "      <td>0.105623</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>total_rooms</th>\n",
       "      <td>0.044568</td>\n",
       "      <td>-0.036100</td>\n",
       "      <td>-0.361262</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.930380</td>\n",
       "      <td>0.857126</td>\n",
       "      <td>0.918484</td>\n",
       "      <td>0.198050</td>\n",
       "      <td>0.134153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>total_bedrooms</th>\n",
       "      <td>0.069608</td>\n",
       "      <td>-0.066983</td>\n",
       "      <td>-0.320451</td>\n",
       "      <td>0.930380</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.877747</td>\n",
       "      <td>0.979728</td>\n",
       "      <td>-0.007723</td>\n",
       "      <td>0.049686</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>population</th>\n",
       "      <td>0.099773</td>\n",
       "      <td>-0.108785</td>\n",
       "      <td>-0.296244</td>\n",
       "      <td>0.857126</td>\n",
       "      <td>0.877747</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.907222</td>\n",
       "      <td>0.004834</td>\n",
       "      <td>-0.024650</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>households</th>\n",
       "      <td>0.055310</td>\n",
       "      <td>-0.071035</td>\n",
       "      <td>-0.302916</td>\n",
       "      <td>0.918484</td>\n",
       "      <td>0.979728</td>\n",
       "      <td>0.907222</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.013033</td>\n",
       "      <td>0.065843</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>median_income</th>\n",
       "      <td>-0.015176</td>\n",
       "      <td>-0.079809</td>\n",
       "      <td>-0.119034</td>\n",
       "      <td>0.198050</td>\n",
       "      <td>-0.007723</td>\n",
       "      <td>0.004834</td>\n",
       "      <td>0.013033</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.688075</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>median_house_value</th>\n",
       "      <td>-0.045967</td>\n",
       "      <td>-0.144160</td>\n",
       "      <td>0.105623</td>\n",
       "      <td>0.134153</td>\n",
       "      <td>0.049686</td>\n",
       "      <td>-0.024650</td>\n",
       "      <td>0.065843</td>\n",
       "      <td>0.688075</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    longitude  latitude  housing_median_age  total_rooms  \\\n",
       "longitude            1.000000 -0.924664           -0.108197     0.044568   \n",
       "latitude            -0.924664  1.000000            0.011173    -0.036100   \n",
       "housing_median_age  -0.108197  0.011173            1.000000    -0.361262   \n",
       "total_rooms          0.044568 -0.036100           -0.361262     1.000000   \n",
       "total_bedrooms       0.069608 -0.066983           -0.320451     0.930380   \n",
       "population           0.099773 -0.108785           -0.296244     0.857126   \n",
       "households           0.055310 -0.071035           -0.302916     0.918484   \n",
       "median_income       -0.015176 -0.079809           -0.119034     0.198050   \n",
       "median_house_value  -0.045967 -0.144160            0.105623     0.134153   \n",
       "\n",
       "                    total_bedrooms  population  households  median_income  \\\n",
       "longitude                 0.069608    0.099773    0.055310      -0.015176   \n",
       "latitude                 -0.066983   -0.108785   -0.071035      -0.079809   \n",
       "housing_median_age       -0.320451   -0.296244   -0.302916      -0.119034   \n",
       "total_rooms               0.930380    0.857126    0.918484       0.198050   \n",
       "total_bedrooms            1.000000    0.877747    0.979728      -0.007723   \n",
       "population                0.877747    1.000000    0.907222       0.004834   \n",
       "households                0.979728    0.907222    1.000000       0.013033   \n",
       "median_income            -0.007723    0.004834    0.013033       1.000000   \n",
       "median_house_value        0.049686   -0.024650    0.065843       0.688075   \n",
       "\n",
       "                    median_house_value  \n",
       "longitude                    -0.045967  \n",
       "latitude                     -0.144160  \n",
       "housing_median_age            0.105623  \n",
       "total_rooms                   0.134153  \n",
       "total_bedrooms                0.049686  \n",
       "population                   -0.024650  \n",
       "households                    0.065843  \n",
       "median_income                 0.688075  \n",
       "median_house_value            1.000000  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# corr() 计算出每对特征之间的相关系数， 称为皮尔逊相关系数\n",
    "corr_matrix=housing.corr()\n",
    "corr_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "median_house_value                      1.000000\n",
       "median_income                           0.688075\n",
       "households_per_population               0.264910\n",
       "total_rooms_per_populations             0.209482\n",
       "median_income_per_housing_median_age    0.207899\n",
       "rooms_per_household                     0.151948\n",
       "total_rooms                             0.134153\n",
       "housing_median_age                      0.105623\n",
       "households_per_bedrooms                 0.091680\n",
       "households                              0.065843\n",
       "total_bedrooms                          0.049686\n",
       "population_per_housing_median_age      -0.014854\n",
       "population                             -0.024650\n",
       "longitude                              -0.045967\n",
       "latitude                               -0.144160\n",
       "bedrooms_per_room                      -0.255880\n",
       "Name: median_house_value, dtype: float64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing['rooms_per_household'] =housing['total_rooms']/housing['households']\n",
    "housing['bedrooms_per_room'] = housing['total_bedrooms']/housing['total_rooms']\n",
    "housing['households_per_population'] = housing['households']/housing['population']\n",
    "housing['total_rooms_per_populations'] = housing['total_rooms']/housing['population']\n",
    "housing['median_income_per_housing_median_age'] = housing['median_income']/housing['housing_median_age']\n",
    "housing['households_per_bedrooms'] = housing['households']/housing['total_bedrooms']\n",
    "housing['population_per_housing_median_age'] = housing['population']/housing['housing_median_age']\n",
    "#关联矩阵\n",
    "corr_matrix = housing.corr()\n",
    "corr_matrix['median_house_value'].sort_values(ascending = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0, 16, 0, 550000]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZ8AAAEHCAYAAABx10u6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOydeXhU5b34P99zZskGIQREQ1ikgXKBAhUqUpQq9qqtircVqVct/q5tve1V66277bVu19aKtbcu19buVtuKeFsRl9YFRVBRsAkCoqSABFCQEELIMss57++PMxNmObMlkzBJ3s/z5JnkzHnPvDM5837f7y5KKTQajUaj6U2MIz0BjUaj0Qw8tPDRaDQaTa+jhY9Go9Foeh0tfDQajUbT62jho9FoNJpex3OkJ1BIDBs2TI0dO/ZIT0Oj0Wj6FOvWrdunlBqeyxgtfGIYO3Ysa9euPdLT0Gg0mj6FiHyQ6xhtdtNoNBpNr6OFj0aj0Wh6HS18NBqNRtPraOGj0Wg0ml5HCx+NRqPR9Dpa+Gg0Go2m19HCR6PRaDS9jhY+Go1Go+l1tPDRaDQaTa+jhY9Go9Foeh0tfDQajUbT62jho9FoNJpeRwsfjUaj0fQ6WvhoNBqNptfRwkej0Wg0vU6P9/MRke1AC2ABYaXUTBEZCjwGjAW2AwuVUk2R828EvhY5/9tKqb9Gjs8AfgsUA88AVyqllIj4gYeBGUAj8BWl1PbImIuB/4pM5b+VUr/Lx3uq39NCbcMBpo8aQs2IQTQeCrCzqZ3qimIANu5uBoTJVYOpLPPHjVtV/zF+j8nIimImV5XHPR8ler1Sn8nu5nZAKPEabNh9kOa2IIeCFqdPGsHMYys7z9+4u5n3PmphfcMB/F6TL0w5mmGDitj2cQvPbdxDRYmXccPLCIZtfB4Dr2kwqqKYhqZ2/B6Dtz9o4q0P9jNiUBFHDfLT1Bbio4MdVJZ5QQnFXpOhpV5ClmJXczvb97XiNw0sWxEI25SXeBGEsmIPZT6Tlo4Q2xvbUErhM00CYQsF+DwGlaV+QpbCY4CtnGMeA3Y1d1DqMxlW6ufDgx0UeQyKfB6OHlzEiMF+djW1s+tAO5YNgbCFIRCybcJhG5/HZHCJF69hMLTUS6nfA0rYd6id/W1hhhR76AgpDIED7SGUUtQcNQhDYG9LAJ8phCzF/rYgpX4Tj2EyyG/yUXMHIWVTXuSlPWRjCoQsmwNtYXweGFJSRFg5c6go8RIIK5o7AliWUOQ18HtNTAGvx6QjZOExhONGDWFvS5Ct+w5R5PVQXVFMc3uQtqCF1xBagmE6gmEOtDmfmdcAjweUgkFFPizL5lAgjGkIHsMgrGxKfV6OGVzEh83ttIZCWBZ4PMKMUUMpL/by94YDlPpMvB6hI2QTshXFXgMToS1kM3ywj/2tIZpbgxw1yIdpmjS1BRlcZDKkxE9VeTEHA2EmHj2ImqPKONQRZuWWvby5rYn2cJijyoo5+ZPDOXqQn0fW7ODD5g5sBX4PlPg8TBgxiP2tQXbsb0WAQcU+hpZ4GVLiR0TxYXMHew62E7ahxGcysrwYwxAOBcOIEqZUDcYGgmELj2HQEbYZ5DfYeyjIp0YOYWp1OQCHOsI0tgbweUw27W7mo4MBjh8zhLawzVGDivjk0YMp8Rqs2dZIa9BiVEUJg4q8fNjczuaPDnL0YD/HDC5m1T8aqSjxMqdmGBOPHszu5nZ2NbUTCNtMqRpMW8jiYHuYj5rb+ehggLFDi/mgqZ1xw0o5bfLRbPv4ECu37GPu+GE0t4dYVvchx5T7GVTkxe8xCIRtxlSWMvHoQZ3f8aryorjfW4MW1RXFSWtE7PoDpFyLYsdFj5/zwOrOY9vvPLObK2H3EKVUz76AI3xmKqX2xRy7C9ivlLpTRG4AKpRS14vIJOCPwPFAFfACMEEpZYnIm8CVwBs4wudepdSzIvIfwFSl1DdF5HzgS0qpr0QE3FpgJqCAdcCMqJBzY+bMmSpTM7nv/+UdHn5jR+ffJ9ZUsvaDJryGQUfYwrIVduQj9Rhwz8LpzJ8+MmkcgNcUfnzeNOZPH9l57MnaXVz/xHqUrQhY6f83J9VUsmDmKK55vI5QhnM1Gk3fwm8KYgh3nTu1c41wW0einFRTyVuRtShk253jomtKR8hOGpMvASQi65RSM3MZc6TMbucAUS3kd8C/xBz/k1IqoJTaBtQDx4vIMcBgpdTrypGWDyeMiV5rKXCqiAhwOvC8Ump/ROA8D5zRnUnX72lJ+sevqm+kI2TTEggTsg4LHoCwDdcurWPttkbXGyZkKa5dup7GQwHA2Z1Eb5JMggfg1fpGrn6sVgsejaYfErAUHSGb655w1gi39SeWV2PWoui4+j0tKQUPwNgbnu6p6WekN4SPAv4mIutE5NLIsRFKqQ8BIo9HRY6PBBpixu6MHBsZ+T3xeNwYpVQYaAYq01wrDhG5VETWisjajz/+OO0bqW04kP6dumCKwcot+1I/bwg7m9qdCTa14zVy+5coyXlKGo2mD+E1DHY2tee8/ngNg9qGAzmvKb1Fb8xqjlLqOOALwGUiMjfNuW5LqUpzvKtjDh9Q6iGl1Eyl1Mzhw4enmRqdNtZcsJTN3PHDUj9vq05fUXVFMSHbfYeSCtFKj0bTrwnZNtUVxTmvPyHbZvqoITmvKb1FjwsfpdTuyONe4M84/pw9EVMakce9kdN3AqNihlcDuyPHq12Ox40REQ9QDuxPc60uUzNiEItmj447dlJNJUVeg0F+D15TMGJEnseAxQumMfPYyqRx4Ph8Fi+Y2ukYrCzzc9e5Ux1HtZlZpTmpppJ7vjIdbxbnajSavoXfdIJW7jrXWSPc1p9YYtei6LiaEYM61xQ3jmTQQY8GHIhIKWAopVoivz8P3AacCjTGBBwMVUpdJyKTgT9wOODgRWB8JODgLeAKYA1OwMF9SqlnROQy4FMxAQdfVkotjAQcrAOOi0znbZyAg/2p5ptNwAEUZrTbL1/dykMrt4KAbcM3ThrLmVNH8k5DEzct2xSn8pkCV/3zBCYePYjNH7Vwz/Pvk+g2ElzUxCyoLPFQc1RZXLSbAjrC2V9j9BA/Ow4Eko6PP6qUtkC4M9oNpegIWbRbXZhoGvwmBPJwzVI/tCa/jZQIMLqiiKBt82FzMLfX8hnUDCvrE9FuYRtiXRDjh5cStm3XaLfmjhA79ne4flYn1VQSsu2CinZraQ/x59rdmOIEDF1+Sg1TR5b3+2i3rgQc9LTwGYej7YAT1v0HpdQdIlIJLAFGAzuA86JCQUS+B1wChIH/VEo9Gzk+k8Oh1s8CV0RCrYuA3wOfxtF4zldKbY2MuQT4buT171BK/SbdfLMVPoVG46EAc370UpxTschrsPr6eexsaueiX66hJXB49R/k9/DI12cxbdQQ6hoOJD2fCa8pKFsRdrl1/B7h6StO6vziAHz2zpcIhNOr/tGd2V3nTmVOzbCk9+Nc22DxgsMRPOmi/Io9Bu0Jr2lEJGomI4TfY6CUIpji2qV+k0DIQkRSvr4pgAiW7f68zzmBoJXd/yyRz08czvHHVnL3397DaxpYSsVFRQEpF6Ku0JVrpRsTTQ9w26TFUr+nhdN/+mrS5+j3CK/dcGq331c+Sfc9LKR59gRdET49mucTEQLTXI434mg/bmPuAO5wOb4WmOJyvAM4L8W1fg38OrdZ9z2igQodMctq1Enp5keK2pCha34mn8fgwQuP47kNH/GHNxvin1TwxftW4TedcM/LTq7BNDKbBS3L5pb5UzoXz7vOncq1S+sIxEi4QNiJ4KkqL+LaNILH7zG4fF4Ni//2ftzxFHIgCRH4p2MGUdtwMOk5U2DR7DHMHjeMqvIint3wEfc8/36cliiAYaQTTIKtFFFraZHXQCm47OQaILv/yQubP2bFex9jKQhajpp23RPrmVMzjMoyf2d4bWLYbVfoyrWerN3FdUvrMMXAUjaLF8SnFKyq35fxmk/W7uLax+tcBfjlp4wvuAU93few0OZaCBRmGIQmJ9IJmFg/Uqwt2M3PNMjvwe+RjD6kUNiJ57hkzrH4PfHnBixFMHw43PP+FfWErczCLWTDrcs3sXZbI3UNB5hTM4xfLJpJic9MOvf8X7yRUisB+PzEo/hUxAyTiCeLO74jZLsKHgBLwYMvb+Vbj6zjrPtXMbTUhy/hM/CY4EkRYXTiJ4ZiGk4YfjScPhS2UUrx0MqtzPnRS6yu33fY95dmwokfQXShiw3Zj/4frl1ax8r393aG9WeL27Wiob/pxlz1WC2BsKItZBEIK65aUkv9nhbqGg7Ehf+mumb0dd3+z36PwQWzUvs+jhSZNnqaeHq8woGme2Rj7ogKkOtidpI3nTWJnU3tNLUGGVNZyvLLT0yyIUevPadmWKe5p7qimNX1+7hm6XqCKUxllm1z2aNvE7JtJlcN5u0dzZ3PmRK/KIo4u9R7X9rSqQl4DPjGSeP4zertdMS8RjBss+Dnb0Q0AcUlc45N2vWmyleI5YXNe/nOP0/Aa8ZrHx4DpMverHhag462cetTG/F5DMcHFcFrGJ3aSCJrPziQtKBaCixLEbQcM9tVS2r573OmcPyYClbWN2Y9p+hC57YDD4QV33zkbWwX81w6urKb/+WrW5MEY9iGL967yvF3WDaSYO5PvObOpnY8LhqzL2J6LURNwu17GLvR08SjhU8Bk4u5Y/70kcypGcbOpnY27Grm9uWbOqskxPpTpkWclOmuPX/6SCYdM5gv3vuq687TUnT6I2IFT/S5WDpCNl+YcjQXzBodZ+N/bsNHcYIncQzAg69sdUrTmIIBWSXeAvhMg9agxY/Pm8a1S9djGkLIcrQLyXNgoNc0kj6jtpCN32MQdjGdpXrPsYRtuOHPGzKe5zEE0yBSvijMuZ+uZtvHh2gLWa7Cry2YbJ6D9BucXHfzjYcC/HLVNtfngpYd5+NKd80Nu5o5lBD14TOFZ644kZoRg1yvUQjEfg/z4Wvrz2izW4HSFXNHZZmf6opibn96U1yVhI6QHTc+m2vXjBjE3edNS2v2yQa/Kexu7mBnUzuTq8qZO8HJpbr96U1ZjbcUEImYy5boQjZ/+kheu2EeP7voOARF2Catua4rBMIWN589iSKvQWmMiTAaYOExhMtOHhf3XJRir4HfI1mZAt2wbMX3z57MieOHEbTg0Td3sODnb3DJb9/CVo6mV+JNft2olgHOJmTOj17iol+uYc6PXmJZ7a64c2PNsqV+E5/H4KazJqVcVFNpLIkUeQ18priaghsPBVzvj5vnTy5owROlsszPtFFDtODJgNZ8CpSuOi/dxiWOj/6e6tqx5rhL5ozlwVe2dvl9BC3Fv/32TfymgY2jfY2pLM1qgYoSsqPRYekFhwH4XHxatQ3NBPMQOm0AIoIVYzISEc6YfDSzxg5lWd1ufrVqK63Bw59r2FY89Oo23KJKLVtx8/zJ7Nzf1qXPWAG3LtuYJFAduafwewzuWjCVqx+vi4s2jArn2E1I9F5I1IrA2c23dIS59amNeE2D25dvYpDf46qFV1cUZx3Y8cy3T3INJ3a7h0t9JlOq3P14hUY+owz7M1r4FChddV6mi5SKHe927VKfyb0vbuGBFfX4TMdvkWohKfYK7aHkJ0+ffBQvvruX6FqnAMuGNvvw4nbV5yckmVQAfAYEU1ilUplrYrGBRy45vjP/CZyF4IEV9a7nF3mNrHxIHkMQFF+fO45HXt8RFwJd5DF5dM0O/vflekyROMETJWQpvKbgNVRcfkvQUtz85AbMbpQ/sdOkSvhMg1FDS1i8wN0PURcpvZJugxMNib5tuSPk3CLrYqks87M4IvCi/jZT4MITRrNk7c64OaTSYtzuYUupPuG4z2eUYX9HC58CpavOy9hxbj6f6PjEay+cWc2Z973aGdqcLi/nW58bx+xPVPLwa9t5YfPhenimwMvv7SNd6piguPtv7yUd/+4XJjJrXCWlPpNfr96WFMLtNwVLOZpEOrY3tsUJn51N7fhMI+n9fOtz4xgxuIhbnsps/ou+5i9Wbk0SFEHL4oEV9RnzmOwU8w7buPqGsiXdywbCYZrbg50BJbE+N8i8wYkupIZIXMg7pNfCo36PjbubOdgeZnCxh8lV5Sw6YWxcQmQq+qrjPltNUuOghU8B01XnZey4Up/patqIBhXUNhxgbGUJF/36zaQFxg2/x6C6ooRLf78Or2HExY5ZCqwMi3DQUhR5zCSHeFNbkGmjhtB4KMAZU45m6bqGOFOZGMItX5jIfy1LLyz2tx6uDNB4KEBzeyhJa/J7DL5+0jgAblu+KWszUThSOeI3r23vXBT/bc5Yfrv6g4xj8+Fq8hhO0mq2lwrb8K1H3sZSioUzqlmybmfSjjwpSvLMw1GS6aohZ9LCK8ucnlBRLaAjbKGUotjriXv9VCaqvui413k+uaGFT4FTWeZPW4In1Rcz1bgoseYBt9DXRJysf/jSp6u45akNhCxc/UqZuPyUGn72yj+Sjj/4yla2fdzKC5v34jEFhWCKwu8xsZTNwhnV3Pb0uxmv/+Pn32fiMYNp2N/G7U9vwhQhbNl4TaHIY3YufAAbdx90rT5r4nQydGNyVTkPfdVpEdWwv53blm/MSmh3F58pPPPtk9jd3MG/P7w2rnpDKnOlrQ6HhEdL8SfuyJOiJJ/e5NwTYQvDxS9X4jWxUUmaSOL96KYFwOEoyeueWE9LR7jz9dxMVJnu4UJD5/nkhhY+fZDu2pVTLQzpiJqV/vTWzgxnpua40eXMmziCYNh2dbA/t2kPEG9aC0ea8z26ZkdW2kMgbHPpw2uTQpotS/H1k0bz9RPHsap+H3N+9BIG4nrNVIJHgKsfr4v4w2ws205r9uoKBo7vymMIYVtFitUK3z97ErubOzjYHsJO1H3Eac2RqpSPG7E78soyP02tQW5dvolgOOaeSPhw/B6Dn311RlI5HLf7cUiJFyNdXLuCW57aSMhS/cZE1VfNhUeKHu9k2pfoC7Xd8lE/qiv13PJFqc8kbCtCYbsLelP38HkMnrniRM66f1VWgQaJ5LLAS+T1HH+TlVWIt9cAESPJTBgVSJ3zEEHEMWG1BcMosi8dFCX2nomWsUmco98UlEhnqSS3TY7b/eg1nQCNXCMMY2sO9mUGYrRbwdV2G6gkVr3OROOhAK//Yx/7DgU5sWZY5xi362zc3Zy8o1SKnzz/PieMG8rsTzi9gxJvfidq6SAH20O0dIQyOsi7iiHpF8KoGchjONW3exWlqG04gNnFTNNcNAsF/Pi8qYwaWkp1RTH/+ae/82qaagU+U7hi3njud4nMS/yYLKXwiHDucSP57euZ/U0AC2eO5MnaDzFEsCIVMCrL/NTvaeHape5lbMQQnnapjBGLm5/Draad1xRsW6XVXvuLiaqvmQuPFFr45JnEHuuLZo/mtnM+lfL8J2t38Z3HauMW7EWzR4Mi6TozxgzluoRimwAdYcUja3bwyJodCGAaxDl2FXD1ktq8m4jcyHZ9Ng0DW9k579i7Q9BSNOxv6xSAPU1LR5gte1oIhS3e2Jayk0enRvbS5r1ZbwrCtuL3a7ITPN/9wkRGlBfxxLpdnYv/Lcs28t5HB/nTmw2ugsfnMdKGQ0fJpghqidfkZ1+dATglftoSPv9UfiRN/0ab3WLortmtfk8Ln//JyqTjL3xnruuXuPFQgNk/fDHrrHufKTln6Ps9glLdz+xPNP3kgxKvQVsXzF/Zklhnzu8xCFt2XiLPvKYjQFOZ77KtIOc1hR+fN405NcNyuhcgs5YJzmf8s6/O4Ou/eytrM1g0uCHbagLLand1+jncfGFREx+QZKLzewx+sWhm2rYKmsKnK2Y3XV4nj6TqsZ7q+M6mdiQHE1Au50Yxxch6nEsFGMDxRfz2ks9w7WkTcqpMkA6PkTlnpzv4PQaehOrctkpv9snl2j9ZOD2tGS7blwlZinc/PMjG3QdzGOWQzcfXFrL53Wvbsxc8HoO7z5uWUxmb+dNHsvr6eTzy9Vm8dsM87lk43bWKuluF9cULpjJ3wvC0gqfxUIC6hgM5V+TWFDba7JZHUvVYT3W8uqLYtexKKrqipVrKTpv0GcXvMfj2vBr+58UtSTZ7G2jY385PXng/r6a7a07/JD94ZnP+LogTFGCKcPkpNQwt83HLso2d7ycXn006wpZNS0c4ZeKo1xBCObzWg69s5VertrkKCNMApXIPKIjlxZhE4HR0p3BnrJ8jXY5Orvk7umJA/0VrPnnErcf6otmjU36ZK8v83H3eNBKViUWzR7te5+7zpsXtGhfNHu2apwJOK2JnZzmNK+aNTztvv8fgK5+pjmt7EIuynWTMbARPtnqRraCtB6LtLFsRtGx++uL73PbUpjgBkS9Fy1Jw4583pNSichE8UVKZ20zD4G//OZf7/3V6lwuQuuExhEWzR8fdT1GNx03TyFX7SFdcM9vCm10prqvpO2jNJ8/cds6nsi4jAod3gm7Rbm7XSdw1Vg8p4QfPxmsPfo/ws4uOY3JVeWfC3/0rtrgmQ5Z4Te5aMJVrXAIZonhMyShUTMNp4rbivY8xUHRkMPPYCv7nRfeaa/mgu2VrCoWbz55EzYhB1IwYhK2Iq5nWVQzguSsdn86Vp06Iu5/cNA0FR0T70BUD+jda+PQA0cUiWyrL/Jw1LfnL7HadWPNG46EA97zwftK47589mbkTjoobs3jBtKS21OCE7e47FEgbfhy0VKSqdGq8psFfN+1Ne053SQwg6O8snDmSC2eN6fx7Ts0wfrJwOlc+VtstE6IN7G5up6LUl3Q/JSYfX7t0PaAIhHs/GbSQKgYMxNydnkab3fow0Z1hLKlKzzu9bU7l6n+egN/jmFq8ppPzsfiv72UMP547vjLt811J2syFEp+ZvU2vwPnnicOzOm9Z3YedJqZo353rn1gPSuExpLNgbCIeA66cV5P22t94eG1S/55ou41YlHKCVmKJbc2RilzMdKnOzdQCvrfI1PNI0zW05tOHybX0fGWZnytOHd/ZVfQbD68lYEE4i1ColVv25WXOXSVsOwVJeytHp6fwewyOH1fJ81kGAUSLfF6zpDauHQNKubZTKPYa/PyrM5g74Sia2oM8/PqOpHOAiAasuGbpeiYdM5iaEYMo9ZlJmwin2kK81M+kfURNd6Y4HWRvPnsyF54wxlV7yBRQcKQLjOpK1T2H1nwKgK6GkibuDP0e4bKT0+94o+OArPvIGEJend254jOFm8+eFNfErTc5c8rR+D2CN43p0TQEn+mEpYOT21LkNVg4sxqfx+kCGg0t3tvSkdXrdoRs/vz2Tk77yUrcFEs3y5vCKX4Kjv/xfxZOTfsawbDNF+99lWW1u2gNWvgT3qM/UnkhVvu46Syn8rXb/Rq7WLcGnbJC3/vLBq5bWpekPWQbUHAkO4O6WRey0fw0mdGazxGmu6Gk0Z3ho2t28MCKeh5auZUHXq5Pe50na3e5VkpIlRjpMYQ2l8ZxvUE0+z/q+/renzf06uv7PcLUUUN4/t29GOnSbJXi5nOmMKWqvLONxeEq0UIo7GgAc2qG0ZpDlF+25XPgcFWC2EVaDAMR0obbBy3FdU+sZ/nlJyJGvGNNDOGCWaO5YNbow5Wvl6euRL2zqd3Vf7hkrVOQNlZ7eOirMwo+oKCQ/E79Da35HEHyEUoa7TT5wIotBMKZrxN9zVzaAHS3OkJ3OP8z1Z2CZ0pVOcW9qIJ5DDhnehU/fHYzQcvGpflqJ5aC25dvorqimIpSH83tQW5bvjFOA7h52QZm//BFbl+euTVErnhN4Z7zpjHpmMGdWnT0f52NwmgaQm3DAW46a1LKBNHqimJuf3pT2vu1uqKYUBZdZx1tQgp+YS8Uv1N/RGs+R5DuhpI++sYH3Lrc6VmTqtNk9HWi9nK314xSiIFkj65p4MpTJ3QufkntBHqQsA1L12bvXPYaBve9uIVH3tyBR0j6nzh5UiqpkV4+CFmK7zz2d0I2nYEIqUywHuNwe/MorQGLm5dtxFKKm86cxJSR5Uk+Frd7xxRhxea9nDLxqE4hdfPZk/neX9JrqCHbZnLV4IJrQeDmlzrSfqf+ihY+R5DuqPSPvvFB2i94yLbZsKuZrzz0etwXe07NsIyFIAsJy1a8/o9GZn+ikp1N7ZzyyeE8t7FnQ7pjyeWTag2GO81kvd+sgk6/UDRo4L6XtrhqrT+/aAabPmzh/hVb8JgGrRGVLhrMcfvTm1xbdJT6TALheMHZGrS45amN/NeTGzpNcBeeMAYEbn1qUySiUrFwZjVL1u5MEjKFtLCnM4HrStX5RxcWjeFI9POJLcqYTXthSF+QtMTrdP68ZM6x/Hr1trjdd7SI4/L1uztt8H2BL396JM9s+BBTpM9Hu/UmRR4jqakegM808HucIqDnHjeSpet2xt1LflNY8s3PxvXViWrZgtOwz63IbWJfqeg9HPWBpWrpXgjko0/WQEb38+mDuO38MgUhbNzdjGkkZ1z6TGHRZ8fwm9Xb+c3q7UlmH6fL51t09ELb51wwxamikMoP9VTdLtdor75EsVcIWvmrL5cNboIHnPDpaMO6RMEDELAUpTFVZt20bAWU+gxag6lNxpVlflbV70u6lwuxWZyuptD76ICDAiA2lDRTEMKTtbv4xsNraXdZja857ZP8atU2AmE75cKTq+DJVxXrVPg9wnVnTEQhlHjdy2r3dcED0B5SeCIfpTdPn6khUOLr3lfYYzjdVmMp8hqdGmbjoQC3PrUxaZzXNJLK/CSajPtSbTYd1db7aOFTYKTLK0gVqeY1hTv+ZQoTjxnU7bpfiaRrezClKvfqx4mc/Mnh3PP8+wTDNm2h/m1SC0T+N7ZSSQt+V7AVBLopmcO2jVtlpeiiu7OpHa/LXIOR0PF0UWB9KUdGR7X1PtrsVmCk24G5mQb8psGPF07jrGlVrHzfPWveZ0CwC2uUzxRspVJWs96wu6Xb9db+2ovBA4WCpcDKIhw522t1hyvmjWdMZUnKiLPqimLXDYhSijOmHM2sY4emLKLb17SJQgp+GAj0iuYjIqaI/F1Elkf+Hioiz4vIlshjRcy5N4pIvYi8JyKnxxyfISLvRJ67VyId0kTELyKPRY6vEZGxMWMujitjPyYAACAASURBVLzGFhG5uDfeazqyqWSQbgfm9mUOWDZXP17HstpdTK4anFSJwBQn0TAXTIFzj6viP07+RFZtFDIVHe0KRabQA5fVxOA14AtTjo5rBrf6+nlx/sXKMj+Xn5Icsl3i8/Domh2cdf8qbn1qE2fdvyqp5lnsvVzqN/F5nOoIhbyoH8lqCr1BITXm65VoNxG5CpgJDFZKnSUidwH7lVJ3isgNQIVS6noRmQT8ETgeqAJeACYopSwReRO4EngDeAa4Vyn1rIj8BzBVKfVNETkf+JJS6isiMhRYG3ldBawDZiilmlLNsyej3XKtZJAq2m1Z7S7X6tReU3jjxlNZXb+Pa5fWYYqBpWwWL5gGOBnl0Vpb//Lpkfzf2ztTCpZs2jNHMQGVw/ma3mXiiFK2NbbhM006whZKKScvzFKd+UDZ3IufvfPFhMhJAYRAODk6DOJzyx594wNufWojXtPAUko3hDtC9GRjvq5Eu+UkfETkRGC8Uuo3IjIcKFNKbcswphr4HXAHcFVE+LwHnKyU+lBEjgFeVkp9UkRuBFBK/TAy9q/ALcB2YIVSamLk+L9Gxv979Byl1Osi4gE+AoYD50fPiYz5eeR1/phqrj0lfPIdxrm8bjeX//HvSccfvuQzzJ1wlKvgij0G8Nk7X4pbODT9l+9+cSKzjq2kuqKYptYgX7xvFUEXoZHuXkxMCbjs5BoeWrmVlphSQYP8Hr4xdxz/+3J953k3nTmpsypCLq+nyS89HUreo6HWInIzjhbxSeA3gBd4BJiTYej/ANcBsQbhEUqpDwEiAijafGYkjmYTZWfkWCjye+Lx6JiGyLXCItIMVMYedxkT+74uBS4FGD16dOLTeSHfYZyDi70pnnHsVG4JcbHH6hoO4DMNLXyOEF4DDKN7n78v0qo7m63jXc9t5rkr53ZWuPCbRpzwyeZenD99JJOOGdzp36ko9fHAy/HNAIOWHSnzdLj3z61PbcTncQ866Cnho3vvJFOIoeS5OAO+BMwHWgGUUruJFyhJiMhZwF6l1LosX8PNyq/SHO/qmMMHlHpIKTVTKTVz+PDs+qzkSr4dr26+HY/hHO/qfDS9R8im24L/3Bkj8WTpFAvb8MX7VvHQyn/w9x37CYTj6y9kcy8+Wbsrzr+zun5fkm/y8lNq8Jnx4fJe00jKI+rJoAPde8edQgz+yEX4BJVjo1MAIlKaxZg5wHwR2Q78CZgnIo8AeyLmNiKP0ZCnncComPHVwO7I8WqX43FjIma3cmB/mmv1OvkO46ws83PPwun4PU5ujN8j3LNwuuv13ByM0fkk7kg1fYc/vrUzp7D6YNjmB89s5pan3iVoOTuzbO/FVPk6c2qGxQUqXDBrtGt/qZvPji9WetOZqVsydIe+lFfU2xRiKHkuodZLIn6TISLyDeAS4BfpBiilbgRuBBCRk4FrlFIXichi4GLgzsjjk5Ehy4A/iMg9OAEH44E3IwEHLSJyArAGWATcFzPmYuB1YAHwklJKRXxBP4iJpDstOpcjQb7DOLO5XqqmXtHxk44ZzBfvffWIVq3WHBkU8MMvf4rZn6jMeC+mM9kkRobdde7UuICXqFP7jMlHH27J8HTqlgzdoRBNS4VEoYWSZy18lFJ3i8g/Awdx/D7fV0o938XXvRNHmH0N2AGcF3mNjSKyBNiEU5vxMqVUNPPwW8BvgWLg2cgPwK+A34tIPY7Gc37kWvtF5Hbgrch5tyml9ndxvnkh38UJ010vdhcY5Xt/2QACF85yBFDNiEHcfd40rntiPQbS75M8NfF0hKys7sdcTDbONkYizaHizYLRNhOxPqFUXUG74rcpRNNSoVFIBVJ1YdEYjkRh0VzJ9ktZ13CAC37xRlIhTp/H4PUb4iNc6ve0sKp+Hz98drMOQhhAvPCduUmJoalIVQA3llQRVdGIN0OEtoT7cZDfwyNfnxVX7607IcHZzFOTf3o62q2Fww57H060W6tSKjsvtyYr0gmXXL6UqZp6eU2JM0PEXTNPWfea/DJxRBmb9xzKaUxiV9rE3K1Fs0dTUeqjruFAVtpFNiYbt7I5SiluXb4pLroulnT14DJpR12dp6YwyMXsFrdFEpF/wUkGHZDkM5wzeq109vD6PS1c+3gdQeuwyeLapXUMKfEyuarcNbT6mtM+yQ+e3Rx3vCNoEQpbrHz/Yw62hzrbabs1l9M4CGCIYOXRSmBGLFPZ1Hl9L0fBA06VcEPAZ5oELZvLT6lh9rihbG9sY/qoIWz88CBzfvRSThpCJpNNqc+M03rAaahX6jMIJpxb4jWxUSnrwXXHb1NIpiVNarpc200p9ZdIdYIBRz4zhaPX8hjCoUhTr8Qd36r6fVy7dH1y6fuw4puPvI3tkjX+ZO0ufvz8e0mvZylY8PM3ko5r3PFHpMS/fmZ0Z6O4fJBLjEdXRJ7PY/DghcdR2+C0WH9o5VYeeLmeu86dSkWpr1vaRSpagxZ+UzoLqIJTVzCUUP7C7zH42VdnMLlqcNLrab/NwCHrWFsR+XLMzwIRuZPC7Lzco+QznDP2WlHBE4vXMNi4+yDXP7E+pdmiLWglzSFV9WtN7gQsRSCs+F0eBU9vELIUVeXF/O/L9QTCKu5e3bj7YI9Um66uKEYS2kUYppEUar14wVTmThjuKugKMSRY0zPkovmcHfN7GKfkzTl5nU0fIJ/hnG7XisXZAaq057jNYWdTe4/34Rlo9DUxfvPZk2gNWkn3jimONyhRuwhaVre1i6jgcHP4R0Ot8+Vf0vR9cvH5/FtPTqSvkE+zQKpKA6U+s7MA4+Sq8qyqEYRsm1KfSV3DAUp9ps7dGcAsnDmSC2eNofFQIOneaQ1aNDS1c9e5U7lqSW1ncVlbwer6fd2ODEslOHL1w2i/Tf8nY6i1iNxHmo2fUurb+Z7UkSLbUOt8hnMmXuumsyYxpao87ovrVLJe7xoG7TMFwxAWzqxmydqdndeZP62KJWt3Jp2v6f/EFox0a4Fd5DVYfvmJnHnfKteq1HrR1+RKT4VaF3biyxEgn2aBbK41f/pIhpT4uPThtUntsRXwyCXHc9Gv34xzIP/l77u4cl4N975U3+dMRppuYiueqtvN2dOqmDKyHI8hcQ3hLFtR61JcVlcD0PQmGYWPUup3vTGRvkY+zQLZXGty1WBsFy3VEGF7Y1uSbT9oKR54WQuegUiHpbjlqU3c8cy7/MfnPpHUiTRkKSpKvF02H+uq0Zp8kEu023ARuVtEnhGRl6I/PTk5zWGieTuJBMI2YytLaA+Fk57TxQoGNiFLcf+KetfnmtpCXYoq01WjNfkil2i3R4HHgDOBb+IU8/y4JyalcWfWuMqkPAq/KbSFbEQSc9o1mtT5RNNHDaFmxKCczMfdrT6g0cSSi/CpVEr9SkSuVEq9ArwiIq/01MQKncTOoBt3HwSc3IrdzR0cbA8xuNgbl0iXOCZ+fHNkjI+q8iJefm8vaz/Yz4wxQ/nycdVUlvkP51HErigibP34EB5xOu5pNJlYNHt0Z023XMzHG3cfxEhok5XJT6RNdIVFIf0/chE+0bXtQxE5E6c3TnWa8/stsRUOOsIWYSt1R0mPAfcsnI6CzjHtoTAiQpHHpD0URiFYtvsVntu4lzue2cy9509n/vSRzBxTwar6xs7nA2GbW5/apHUeTVb8v9ljuOWcKTmPe7J2F9e5RFym8xOlqwRSSIvgQCGflVnyQdZVrSNdSV/FadB2HzAYuFUptaznpte7ZBNq7Va5NxM+E0S61zbZI/CnS0/QpXE03cLvEV674dScFvxU97zfIyxeMM11AUtV4Xr19fNYVb+voBbBgUC6/0c+hH9XQq1zaWW5RinVrJTaoJQ6RSk1oz8JnmyJViXIBcHA7GbFAQX8ddOebl1Do/F0oYyO2z1f4jP5xaKZKYWG2xinXFSz7jZ6BEj1/+huSaXukMsq+pqI/E1EvhbTHXTAkaoqQToUdkqzWrZYCn67elu3rqHpn/jM7Dc2HeHcy+i43fO2UkyuKs9pjPO3FNwiOBAoxIKtWQsfpdR44L+AycA6EVkuIhf12MwKlMTCh15TSPfV9xhw93nTWbzg8BiP4fTVif6ejVbkNQU3S5+u4Daw8RqwYEY1nmy/yV3YA3Wl2GeqMZOrBhfcIjgQKMSCrV3qZCoiw4B7gAuVUmbeZ3WEyKWTaXej3Zpag9Q2HGD6qCFUlPqSot3ue2kLT9Z9iM8AGyeZNJgQYv3tU8cz69ihXPDLNwmmaAQ3pWoQsz8xjNMnjeDY4WVs3N3Mi+/u7XNVmjWp8ZpCKMtafm6dQ7OlK0ECbmN0t9EjR08FenTF55NLwMFg4EvA+cAngD8DS5RS63KdaKHSW220M0WdZBvUUOZ3mnclZrDHcu1pEzj/+NFxAvCzd76o2y0MAEyXBnhdCTjoCXS0W/+iR9toA3XAX4DblFKv5zQzTSeZEvUaDwVYsXlvpPR9etx6ACXy4Cv/4L4V9Z0FSxv2t4HSCan9HZ9pYCs76d+cr+rV3UVXrdbkInzGqTRqkojcp5S6Ig9z6tNk2tGlS9R7bsNH3Lp8E15DaA26CxZDnLDrYJYxD1EB9b0/b8AA3Sy7n+Mz4Yp5E5g2agiXPfo2LYH4skshS+mqBJqCIJd+Ppm2ynO6OZc+TyZzWqpEvfZQmDVbG/nBs5sBkvrdx2IrCHZRadGCp39T4jP52UXHMXfCUTQeCqT0A5qG6OrVmiNObgkrmpRkaq99uLV18oIgIiz+6+benrKmnxEb/lxZ5ufyU2pczwtZSkeXaY44WvjkiUxJXOmSU72mgZlj4qqm/5ND+g4+T3Lo7NBSn+u5N589SWs9miNOPle8AZ1ykimJK11yqmUrxzncRfwe4btfmJhTsqGm8PF5DHym8Pl/Oir9eabwzBUnxtVNW/n+x9y2fFPSud/9wkQunDWmR+ar0eRCzsJHREpTPPXTbs6lT5MpiSv2+SKv87H7TMFnCudMr0LFyG7BCSzIBkPg+2dP5tLPfYK7z5sWl8jazYo+miNMe8gmaClWbtlHqS/5q1riNSnyGtx93rTOKtXRfjvf/P26JBNvqd9k4jGDqGs40GfK2TQeCvSp+WqyJ5c8n88CvwTKlFKjRWQa8O9Kqf/oyQn2JvnI88kU7RZ9/qV39/C/r/wDr+H048mWb5x4LL9atS0ueMBjwJrvfr4zVHvj7oN84+G13SpkqikcSv0mobAdn2TsMfjFoplJSczp8sO8pmAI+EyzTyR3FloVZk1qerqw6E+A04FGAKVUHTA3lxcbCFSW+Zk2akhKm3plmZ8Nu5r56Uv1hCyVk+AB+M1r25Ki1sJ2tMKCc/3yYi9ebYLrN1i24uazJ1PkNSj1m/hM4ZI5Y+MED6T2K5b4TPweA6UUgbDqEwU9MwXwaPo+OZndlFINCYcyZzlq4mg8FODWpzZ2eXwqRfXVLR9Tv6cFcPxLQV3BoN9w05mTuPCEMdx05iQ6QhZBS/HgK1uZ9YMX4tpYp/IrXjx7DL9YNJNib3xmRSEX9CzEKsya/JKL8GmImN6UiPhE5Brg3R6aV79lZ1M7XjP9x57OwZyqhNcvXt3G53+yku8/+Q6VZX7mjq/szjQ1BcT+1iCNhwLctnwTsak7YRuuebyWle/vpfFQgMoyPzedNSlp/G9e205VeVGfKuhZiFWYNfklF+HzTeAyYCSwE5ge+TslIlIkIm+KSJ2IbBSRWyPHh4rI8yKyJfJYETPmRhGpF5H3ROT0mOMzROSdyHP3ijj1Z0TELyKPRY6vEZGxMWMujrzGFhG5OIf3mjecyKO9rHz/YxoPBaiuKE6qtxWL32NwwxkT+dbnxiVVuxacKsbpePj1Hazd1sir9fvyMHtNIXD/ii1s3H3Qtfp50IJvPvI2c370EstqdzGlqpxSX3ytX69h0Bq0Cq6qcToKsQqzJr/kUuFgH3BhjtcPAPOUUodExAusEpFngS8DLyql7hSRG4AbgOtFZBJO4dLJQBXwgohMUEpZwIPApcAbwDPAGcCzwNeAJqVUjYicD/wI+IqIDAVuBmbiVLhaJyLLlFJNOb6HLvNk7S6uebyus+JwtKX2XedO5bon1mOK0BGyMAynpXbItlk4s5qz7l+F1zDwGHDp3HFs2NnMq/WNKHBtq5DIcxv34DNNAuFw5pM1BY/PNAGVsidUW6QU03VPrGf55ScmbW6iGsO0UUOYUzOszxT0nD99ZJ+aryY3col2uwv4b6AdeA6YBvynUuqRLMeXAKuAbwEPAycrpT4UkWOAl5VSnxSRGwGUUj+MjPkrcAuwHVihlJoYOf6vkfH/Hj1HKfW6iHiAj4DhOELsZKXUv0fG/DzyOn9MNcd8VrV2qke/lBRxFq0qDMS1ZNjZ1E6pz+Ss+1fFRSv5TGd3mws+08Cy7ZQmOk3fwmcKz3z7JDZ9eJCrYzYziUTbJXzQ2KpbFmh6lZ6uan2aUuo6EfkSjtntPGAFkFb4iIgJrANqgAeUUmtEZIRS6kOAiACKOjlG4mg2UXZGjoUivycej45piFwrLCLNQGXscZcxsfO7FEejYvTo0Wk/gFzY2dTuaiYxxXGaJkbEVZb5qWs4gNcwOqtdg9OCO9eqbKlqemn6JoYhnHX/Ku46dypv3HhqpPdTmKsfr4vb3ORLw9HtDjS9QS7Cxxt5/CLwR6XUfsmi7H/EZDZdRIYAfxaRKWlOd7ugSnO8q2Ni5/cQ8BA4mk+aueVEdUWxq5nEUqmdpm5OVqXLgQ4oTIESn4egZRO2HO01qglf98R6Vl8/j7kTnL2arVSShhOb1NwVwaFzazS9RS4BB0+JyGYcH8qLIjIc6Mh2sFLqAPAyjq9mT8TcRuRxb+S0ncComGHVwO7I8WqX43FjIma3cmB/mmv1CpVlfhYvmBqXb+MxYPGCaWlzgG46cxI+UyjxGfhMg6+dOI6FM6uTzo1VqnROT//BNAwuOmE0N35hIsUugQOxocbzp49k9fXzeOTrs1h++YmMqSztVh6Mzq3R9Ca5BBzcICI/Ag4qpSwRaQXOSTcmIqBCSqkDIlIMfB4nIGAZcDFwZ+TxyciQZcAfROQenICD8cCbkddrEZETgDXAIuC+mDEXA68DC4CXlFIq4gv6QUwk3WnAjdm+33wQdZhu3N0MSFJSYCJP1u7i9qc3ISK0RRr2PPjKVrxmpHabRxhWVsTsTzhh1LGtu0//6Uq0ta3vE7RsHnxlKyVeIykBOWDZSZFslWV+VtXvy4u2Es2tiTX7RgWeNr9p8k3WwkdEFsX8HvvUw2mGHQP8LuL3MXDabi8XkdeBJSLyNWAHjv8IpdRGEVkCbALCwGURsx04gQq/BYpxotyejRz/FfB7EanH0XjOj1xrv4jcDrwVOe82pdT+bN9vvqgs83eaSdIRu+tMJGQpfvz8+7x2w7zORaB+Twt7D3YwfdQQWoMWRR4zZQM6Td+jLS7oRAhaClGq0/cTW0Q0XWfcXNC5NZreJBefz2difi8CTgXeJo3wUUqtBz7tcrwxMt5tzB3AHS7H1wJJ/iKlVAcR4eXy3K+BX6eaXyHhtuuMJbYB2Pf/8g4Pv7Gj87n5046mI6wFT38lWtMtYClI6ESaT20lmluTyo80ENHBFz1HLma3uBbZIlIO/D7vMxqgpGu5AE59r+qKYur3tMQJHoBldR/19PQ0BUSscMm3tqJzaw6jgy96lu7082nD8ckMSNxKvdfvaWHp2gbq97S4Pp94LPp3/Z4WNu5u5vyZo5JeB5wIqGhXytqGAz34rjR9gVjhklgJwO8RLjvZvYNptmQqjjsQ0MEXPU8uPp+nOByqbAL/BCzpiUkVOm47orXb98dpJIZAqc/T+byCuDELZ1SzZJ2TupSqBD449n4FPLRyKw+8XM/pk0f08LvTFDoLZ1bHCYaotvLomh08sKK+817RO/Wuo4Mvep5cKhx8LubPMPCBUmpnqvP7ItlUOHDrmRJ1CKfC7xFAdH8dTV4o8hqsvn5e3CLodl+6nafJDv155kaP9vNRSr0CbAYGARVAMLfp9Q/cSr1nSrY1xXCtdpANOoVHk4hbawHdgiC/6MKmPU8uZreFwGKcRFEB7hORa5VSS3tobgWJaxWCDNqjpWxQXZMiuj6bxhCILZbhFlCgw6Tzjw6+6FlyCTj4HvAZpdTFSqlFwPHATT0zrcLFbUd093nTWDQ7vi6cIXQ+v3jBNBYviB+zaPZoirwGRZl6JGgGDIY4LTVSHU+3A9c79Z5BB1/0HLn4fN5RSn0q5m8DqIs91tfJpaq1W/x//Z4WahsOMH3UECpKfUnPJ46J/t2wv5WrH1+vfUIDHENg0ewxLHlrJ22hw3lbg/weHrjwOMqLvRl34DovRXMk6Omq1s9FStZEWxJ8BaevzoDErXBjzYhB1IwYFHdONJw6uhi4LQgTjx6c0XSn6f/YCn772gdJtfoClk1VeVHcvZWKrhYU1Wh6m1ySTK8VkXOBOTg+n4eUUn/usZn1A9xCsqM25A27mrn96U2dz31mzFBWb2080lPW9CBeAyw7c4MMy1L4PQYiThi+W1kdjaavk7XZbSCQ72ZyiaGaHsOpWmwKSUUj/R5Dm900ABR5HD/iVUtq40L4daivplDp0VBrEfmyiGwRkWYRORipMn0w92kODNxCX8M2BMJ2kuAB8JjChKNKe2t6mgLGVjb7DnXgNd1Dp92qZ2g0fY1cfD53AWcrpd7tqcn0JzLVakukNWDx/t7WHpyRplDwGIIpQsClB4YACuHuv72fVKU8ZNts2NXMVx56Xdcb0/R5conz3aMFT/bEhr4m9mDR9H+KvUbKJn9hWyV1qPWawg+/NAWfxyBkKQ4FDgueUp9JkdfgpjMncfvTm/pVvTGtxQ1cMmo+IvLlyK9rReQx4C9A552ilPq/HppbnyeapLZi815ueWpj3IKi6d9cf8ZETqwZxhk/fZVwQjt1nylcMW88D7xcH6fBjKksxWcm+/7+ZfpIrjptQp+oN5ZLqLeuGj2wycbsdnbM7204HUGjKEALnzRUlvk5ZeJR3Pjnd470VDS9hCkwpWowu5vbueiE0fz2tQ/injcM4YJZo7lg1uikvK+giylu6dsNXHXahLxXMch3TlAuwiSfTfA0fZOMwkcp9W/ZXEhEblRK/bD7U+qfuEUVlvgMbAWnTTqKp9/5CK8IllIsnDmaMZUl3P23zejmpH0PBSz4+Rtxx0wDijwmllLcdOakzkV/2qghnedUlvm5ZM5YHnxla9xYn2mys6mdaaOG5K3ZW761jlyFSV/Q4jQ9Sy4BB5k4D9DCx4WdTe0Uez20BMKdx0p9JrfOn8yb2/ezZK1THNyKdKxYuq4BMYSvnTiOn6/ciq2j4fsUbv8vU+DBi46jYX97XH5X7KL/ZO0ufr16e9LYWO0mtt5Yqc9pnd54KJDTgt0TWkeuwkTXotPks7CYrr+cguqKYoJWvApjKcXYypJOwRNLwFJ0hGx++aoWPP0Fj2ECkjJgICoQEv09fo8kaTeVZX62N7Zy1v2ruOiXa5jzo5dYVrsr67n0RAXsXIWJrkWnyafmo5fJFKyq3xcnRDwG3HXuVLY3tqUdZxgG5BCurSlcLGUDKkk7MA3pXPTNhNYcJT6Tn110HHMnHBV3vCuaS6x/pye0jqgwycUkqKtGD2zyKXy05uNCdKEIxWSqm4bBnJphNLWmb4lka8HTb/jKZ0Yxuao8adFvDVhs2NXs/J7g4LOVYnJVedK1cjVxufl38uU7iqUrwkTXohu45FP4PJ7Ha/ULGg8FWLF5L56ERnKmIfx61Vaa28PMHFPO2g+aXcfbCqaOHMT6XS29MV1ND7Jk7U6uPHUCN501ie/9eUPcc7ct34jb3u2mMyfFVT+PLuiZNJfY8wFXLWn19fNYff28vGsdicJEV9nWpCKXZnLDgW8AY2PHKaUuiTz+IN+T68tEd5umSNKOti1o8cDLW1OMPIyl0IKnnxDVTKZUlXcGCkQxxXC1G+xvDaaMSkuluSSef9nJNSm1pJ7uU6PzeDTpyEXzeRJ4FXgB0AHAaYi1yWsGBt848Vh+sWpbyudjNRMrIezeUjbK5Va5f0U9oAiEVZJvx83E5eYLil4j1Vx6Cp3Ho8lELtFuJUqp65VSS5RST0R/emxmfRi3aCJN/yFRSTEFPnn0IMr8yWWUSqKlcc6a1BlYkBjltXjBNK6YNz5prGmIoxXFEBuVlthl0+2+85kGl58yvtejynoiok7Tv8hF81kuIl9USg3YBnLZkmtRUU3fIjGs01JQUeJNKqPj9wg/i+b2LI/P7Un0tzQeCnD/ivq4UGvLVkmvlk5rSeULSlVNoSd9MTqPR5OJXLbnV+IIoHbdUiE9iTkMqQpMavoPTW0hV41mclV5Um7PtUvr2Li7OW7hryzzs3hB4vipLF4wLWutJV3uTKyW9GTtLub86KUu5Qhli87j0WRCN5OLIZ/N5OBwpE+pz+SMn65E94rrv7zwnbnUjBiUpFHUNRzgol+uiatuAY45zlYqyQnvppHkqqWkO9+tyWFPNqnT0W4Dg640k8sp1FpEKoDxQFH0mFJqZS7XGEhEd5x1DQfwmZJkltH0D86ccjQVpT4gOdQ4lQm2LRLtluiEd8t7yTUXJt35vV1TTefxaFKRSyfTrwMrgb8Ct0Yeb+mZafUvNuxqpi2kBU9/wgA+O64SrwEr3vuYz975oqv5Ktb8VOJNDkiIdcL3Rm8b7YvRFAq5+nw+A3yglDoF+DTwcY/Mqh/ReCjA7U9vOtLT0OQJQ+BbnxvH374zlze3NxKyoS1kEQgrrlpS6yo45k8fyerr5/Gzr87A74n/ykUX/t7ww4D2xWgKh1yET4dSqgNARPxKqc3AJ9MNEJFRIrJCRN4VkY0icmXk+FAReV5EtkQeK2LG3Cgi9SLynoicHnN8hoi8E3nuXhGnEJaI+EXkscjxNSIyNmbMxZHX2CIiF+fw0AVOfgAAIABJREFUXnMi3Y5Vh133L2wFv3ltO5s/OpjkwwvbsHG3ewxOZZmfuROGJwUV3HXuVOBwFYLe6FAaFYaPfH0Wq6+fl9fET92ZVJMtufh8dorIEJxOps+LSBOwO8OYMHC1UuptERkErBOR54H/B7yolLpTRG4AbgCuF5FJwPnAZKAKeEFEJiilLOBB4FLgDeAZ4AzgWeBrQJNSqkZEzgd+BHxFRIYCNwMzceJV14nIMqVUUw7vOSOZsrh12HXfxhQnlDoWy1Z8kKIo7MH2UNrruSWH1jUc6PXeNj3hi9EVDTS5kPWWXCn1JaXUAaXULcBNwK+Af8kw5kOl1NuR31uAd4GRwDnA7yKn/S7mOucAf1JKBZRS24B64HgROQYYrJR6XTnheQ8njIleaylwakQrOh14Xim1PyJwnscRWHkjNos71Y411sxR5HU+br8peLUy1Ce48QsTk46FLMX/vPA+ppEcQn/147UZTWaJyaFuG5SOsEWpL9lHFEshaRnZfBc0mlgyaj4iMlgpdTCiSUSJ9oQuA/Zn80IRc9ingTXACKXUh+AIKBGJ1owfiaPZRNkZORaK/J54PDqmIXKtsIg0A5Wxx13GxM7rUhyNitGjR2fzVg5fMMvIIbcGYL97bRv/9/dMiqPmSOI1haPL3R3xIRtMUfhMI671dSCsci4jE92gXLWkttOUF7IUZ/x0JfcsnO6qPRSalqE7k2pyJZv99x8ij+uAtZHHdTF/Z0REyoAngP9USqVLTHXLxlRpjnd1zOEDSj2klJqplJo5fPjwNFNLJpfIoehut2bEIKorinmqTgueQsZnGvz4vGkMLvamPMdScNbUo5Oi2LpSRmZOzTDMBN9g2IZrl9YlaQ+FqGXoKDpNrmQUPkqpsyKPxyqlxkUeoz/jMo0XES+O4HlUKfV/kcN7IqY0Io97I8d3AqNihlfj+JV2Rn5PPB43RkQ8QDmONpbqWnkjumP1e4Qir4HXgH/77FgA1m5r5I6nN/HHNR9Qv6eFle/vZeX7H3cm3Rmiqx4UKqaAiLNPmVw1GE+ab8nT73wUaRR3mEyLrpu5bGdTO2753qYkCzK3IBZDhI273Vtz9AY6ik6TK9mY3Y5L93zUp5NirOD4ht5VSt0T89Qy4GLgzsjjkzHH/yAi9+AEHIwH3lRKWZFyPifgmO0WAfclXOt1YAHwklJKichfgR/ERNKdBtyY6f3misKJgApF7CUPvrKVB19J3S7BFDhz6jEEEr3YmoLBUmBFzGerr5/HPQunc83jdQRd/memAd/63HgeeLk+q8ZsqcxlpT4zznwXJewiyNy0jLagxTceXsviBdOOmPlNdybV5EI20W4/jjwW4USO1eGYtKbiCIIT04ydA3wVeEdEaiPHvosjdJaIyNeAHcB5AEqpjSKyBNiEEyl3WSTSDeBbwG+BYpwot2cjx38F/F5E6nE0nvMj19ovIrcDb0XOu00plZV/KlsaDwW4bml8l9JMWAqW1X2Yz2loeggDYePug8yfPhJD4OoldUmbhragzdBSX2eh0KhPr/FQwLW0Tao2A61BC78pSde/Yt5414oHd507lWuXro8rRNoVf1NXSFcyR1c00GRLRuETSShFRP4EXKqUeify9xTgmgxjV5G6vfapKcbcAdzhcnwtMMXleAcR4eXy3K+BX6ebY3fY2dTuGvGk6R+0hRxt4iufqeaxtxpSaqu3P72JM6YczfbG1rRBAOmc8tUVxYgRH9ft9wgXzHIPgpk/fSRDSnx88/fraAsdbq/V007+Qgt00PRdcgn4nRgVPABKqQ3A9PxPqe9QXVEcKXuv6a8EwjYPv76DQDj1/9lrGGzcfTBlEEDUx1PqM1M65d18JosXTEsrRCZXDcbuxUZxhRjooOm75JJk+q6I/BJ4BMfVcRFO3s6AJVoG/+rH63IyvWn6F45AUa5azaNrdvC/Mf6ghTOrWbJ2p6t/aP70kUw6ZjC1DQeYHomMTEdUYLm10+4JdDi1Jp/kInz+DcfvcmXk75U4VQcGNFEn6+v/aOSKP/49OZZb0+/wmoIh4DPNzgV/clV5klYTtGweWLElrg32Y2818OjXZrG9sS1JwHTFpNWbTn4dTq3JJzn18xGRYmC0Uuq9npvSkaM7/Xzq97Rwxk9f1W0T+hleQ/jScSNZVrc7TigkJg1XVxSzun5fnBZy2ck1PPByfVzvHACPIRR7zTgB09t9drrKstpdSZqW9vloerSfj4jMBxYDPuBYEZmOE0E2P7dp9j+erN3FtY/XacHTDzEF/vL3XXzr5E8wqqIkTltZVb8vSVOJbY/d1Brkx8+/n3TNsK06m8tFo9P6iklLh1Nr8kUuZrebgeOBlwGUUrWxFaQHKlEnrFsOiKZwmTG6nHU7MidldkT+rz99sR6fKRiGdGo+bmHTyy8/nHnQGrTwGk4pnlTERrv1FZOWDqfW5INchE9YKdUsOjOf+j0t/HXjR4BiTGVZ0o5VU/i8s+sghjgJwtkStBRYTi7NQ1+d4fp//+K9r+L3OCa1m86ahJHhRRKj3XoreECjOdLkInw2iMgFgCki44FvA6/1zLQKl+//5R0efmPHkZ6GppuEbZWT4InFKW0jSRUJov6aoOWY1G5fvonvnzWZm5dt6CwYagiYhlDkMV2j3bRJSzNQyEX4XAF8DwjgFBv9K3B7T0yqUKnf06IFTz+hO+65kG3TsL8NK8ZM5jEEU4hLRPUaBlNGlrPmu59n4+6DHGwPMbjYS1V5UWeQgq4QoBmo5CJ8JkV+PJGfc4D5OGV2BgS1DQeO9BQ0ecJjgG2Ts7HU7xFuOnMStz+9Ka6TqSEKxCC2cHqsSa2pLZgUnDBt1JC8vBeNpi+Si/B5FKeczgZy/872C6brxaLfELYdE1guiVklPpOfXXQc5cW+JH+P3+Ph0rnjXAuMpqvpprUczUAlF+HzsVLqqR6bSR+gZsQgFs0ezcOva9NbfyBX05utFJOrygFcI9MumDWaC2aNTvLZ9JUwao2mN8kp1DpSXudFHL8PADE9egYEt53zKRadMDYu2u3G/3unM29D078QoMzvSQoOSBeZlihQ+lIYtUbTW+RaXmci4OWw2U0BA0r4gKMBRRMNGw8FkhYWTf/AI3DdGRMZWupLKoWTS2SaDqPWaJLJuryOiLyjlPpUD8/niNLV8jrLanelbDam6fuU+U3Ctsq6lEyqfjfp+uBoNH2ZHi2vA7whIpOUUptynFe/J1qN+Iv3rSIY1lpQf+NQwOmXk02QQLrioDqMWqM5TC79fE4EakXkPRFZLyLviMj6nppYX6LxUIDWoMU1p03AZxr4zSM9I40bhjgVqQf5/3979x4dVXnucfz7m5kkRoKIUVFuRQUvQAU0VTlUW0GtVqvtEe+tnl4OS5dSe5b3uqrHumy1WHtRlz1WPerRahVboVprqdSqHG9oAUFb5dSqARVFvICQy8xz/th74mQyM5kJyeyZ4fmsxSLZ2XvzTMLk3e/7Pu/zJmhIxIiXWKwjnSSQj+9301N6L6Mt+Xvgciul53P4gEVRxeYtWcX5c5dhRo8V766ypAzqBNefMgUQZ975fEmJIu3JZMEkAc9q6853PXWFFN34mNlrAxlINVq7vo1zfSO5qpKIxxjSWJ8zAw2CKtZIOXeoPevgcQUbEc9q+4SvbapMlTTvWMqwm8uyYvUH3vBUmY6k5dy2uiER45xDd+eZiw/h4bMPpD5rTK4hEePk/UcXvHeurbC31Ky2dC8wU2/Dlm5gzVuyimlXLeSrNz3NtKsWMn/JqkjjKWXYzfXgFb6rzREThvVayLO5qYGrj5tUdGp05tNkoRTsSnrqHGjeC6wsldgT9cZnM0wYvg2JGHiCW/V4aMXbXLK+rdc3XLHrePLNa2Sfv6XNf/japspSifOR3vhshuamBq45fjLnzV1KXDE6UykO22tHHlj+dtShuTzq4up6w2U2CO3JJGcdPI6T9x/drQdU6I1Z7NNkJT51loNvEVE5KrEn6o3PZsp+g61Y/YE3PhUsmQrmfHI1CD9e8DLX/Xklc2YW1ysp9mmyEp86y8XXNlWGSuyJeuPTDzLfYBOGDyERE52bs2GM6xfxcBPRWExsVRcjGVYpaG5qYOkb7+fcibStM8W59y5l/M7bdCunk0uxT5OV+NTptjyV1hP1bLc+WLu+jcdefofHXl7TtXguOLaGXz7+j5xpuq78khYUH0zE4IZT9mHRBdO7ejT5Uq0h2C77i9c+0Ws2ULHZbVtyFlwtLDKthdeQ1tzUwKRR21bE/z3v+ZRo3pJVnHPPkq4kg7q4OHSvYSx46W1Pu65Q9fE4QxrrAVj6xvvdUq3Pm7uUts6eP7f2zlRR8zLFPk1W2lNnOdRCkkUtvIZK5Y1PCdaub+P8uUu7Zbd1JI3fL38ruqBcr9qTSZav+oATbnyyR3LBJUdN4JJ5y8n13FDsvEyx8xpb0vxHLSRZ1MJrqGTe+JSgdd1G4ooByahDcSVIGVw6fzmdKXokFyRTqZwND/ReTsflVwtJFrXwGiqZz/mUYOTQRpLmi3qqTUfScq7FautMFVyjlTJYtPLdgQushtVCkkUtvIZKNqCNj6RbJK2RtDzj2HaSFkh6Jfx7aMbXLpK0Mqyc/YWM4/uGVbRXSvq5JIXHGyT9Ojz+tKQxGdecFv4br0g6rT9eT3NTA3NmTiq5GrIrn0ENceoTMRr64YfUkbQtvip1X9VCkkUtvIZKNtDDbrcC1wG3Zxy7EHjEzK6UdGH4+QWSxgMnAhOA4cCfJO1uZkngBmAW8BTwe4IK2w8B3wTWmdlYSScCVwEnSNoOuBRoIUh4ek7SfDNbt7kvyIBEXNQj2jpTeD+ocnz3iD3Zf9dmBtXHOeq6J8gcT6uLC2G0lzhiGo/Jh1n6qBaSLGrhNVSqAe35mNljwHtZh48Bbgs/vg34csbxu82szcxeBVYC+0naGdjGzJ60YNvV27OuSd9rLjAj7BV9AVhgZu+FDc4C+mFLiPQEZFunsdEbnooSlzh235FMCre7zn5i/fFxk3jyokM459DdaUgEx+uK2HdpQ1uQrOD6ppJSe/uqFl5DJYoi4WCYmb0JYGZvStoxPD6CoGeT1hoe6wg/zj6evuaN8F6dkj4AmjOP57imG0mzCHpVjB5duGpxrglIN/AaE6I9RcH1U0kzVqz+gIN2D/475XtinT0jyHJrXbeRQfVxjvj5472myF/+4IscPnEn/+XjXD+qpISDXIP0VuB4X6/pftDsRjNrMbOWHXbYoWCAuSYgE5X0HaxRGzuNRKz3OZwPN3Z0+zzfE2v6+Nhhg/nxcZNoSMTYuj5OfVycst9otq7zrQCcG2hR9HzelrRz2OvZGVgTHm8FRmWcNxJYHR4fmeN45jWtkhLAEIJhvlbg81nXPLq5gTc3NXD8viO5/anXu44lU5AQ5Fin6PpRWxGlw7cJF5KmFbOFQXYPCeC+v7Z2O8cznJzrf1E8t88H0tlnpwHzMo6fGGaw7QKMA54Jh+g+knRAOJ9zatY16XvNBBaG80IPA4dJGhpm0x0WHtssa9e38evF3X8xGd7wVIJELNjiIq2UjbMye0ie4eRceQxoz0fSXQQ9kO0ltRJkoF0J3CPpm8DrwHEAZrZC0j3Ai0AncGaY6QZwBkHmXCNBlttD4fGbgf+RtJKgx3NieK/3JF0OPBue930zy058KFnruo3I06wrQlwCjLpEDDNjzsxJXQ3E5q5M9wwn5wbegDY+ZnZSni/NyHP+FcAVOY4vBibmOL6JsPHK8bVbgFuKDrYII4c20tbhyQaVIBEXvzy1hSGN9T0aiP5Ymb4llcJxLgpeXqdEcd8uoWJMGD4kZwPhK9Odq3yeq1WC1nUbaSxmcYjrF+mks+yRznhMzJmZfx6mL/M2tVQ237lq4D2fEhTaA8b1nYB4jK46a4kYXHbMRCYOH8Kg+jgb2pN0dCZZvvpDtm9qYOpuzb0OiZUyb+Nl850rP298StDc1MDxLSO5/clPUq3jIm9VZFecpoYE158yJfxMTBi+Tc7GomWX5oL3yU6tLmbexsvmOxcNb3xKsHZ9G/dkpVp7w7P5NnUmGT6kkQ3tyT5nl2X3Xr531HgmDh/S6/0qoWx+MeuRnKs13viUwFe5D4xkyjjy2sepj8f7NOyVq/dy8W+XM6g+TtKs4P2iTk7wIT+3pfKEgxIMqo+zyVOti1bsrgYpg7ZO46O2TjZ1pErexiDde8m2oT3Z6/2iXFSa2Wj29bU7V62851OCDe1JGuKizcfaivKVKSOY+3z+ygL5lDrs1VsiSG/3i2pRaSUM+TkXFe/5lGDk0EaUVeDSCx7kdurU0Zz+ud2KOje7h1TqsFdm72VQQ89U+GLuF0XZ/KiH/JyLkjc+JUgXFs30tamjOXv62IgiqjwxwXUnTeH7x3yascMGc+rUwttUHN8ygp+cMHmzh72OnjyCRRdM51ffOoArvjyxKmqzeR05tyVTUIfTAbS0tNjixYvzfn3t+jYO+OEj3fZ/qYuLnxw/ibPuWlKOECteQyLG/144HaBrGGvdhnZuWfQq9z2/ivp4jLbOJMftO4qvTxvD2GGDgf7P+KqmDLJqitW5XCQ9Z2YtpVzjcz4lWLH6gx4bj3UkjZfe/DCiiAZeDEBBUkBv6uJB5YEnVr7bI4PrB/+6N+cctkfeX7L9XUutmmqzVVOszvUXb3xKknuGp5b7jipiEW1jIsYFR+zJlyYNB2DaVQvzLtocqF+y3ntwrrp441OCCcO3IZFRBgaCUjDjdx4SXVADrJjEPhN8adJwmpsaWPrG+2XP4PK1Ms5VH084KEFzUwPXHD+ZhoTYqi5GXVxcdsxE9txp8BaZ9ZaI0WOSvNwZXL5Wxrnq5D2fEh09eQQfberkst+toD4e49J5y5FEfSJW1FbP1SwRg6tn7k1nCsY0b01dIt5V+HPt+rZuO4Gen9UTGahej6+Vca46eeNTorXr27j8wRdpTxrtyfRGq0YtzvzUxUUs2DCUtqSRiMe48LfL+dGxe9OyS3Pe4a5yLtr0tTLOVScfditRvlIuteik/Ubx4OwDsXDv8E0dqa5hrZVvf1RwuKtcizZ9rYxz1cl7PiWqhj196mLQlxJ0YSenyz2LWzlkr2E0xGO0d3Yf1lrSS2JBObPPoiqP45zruy3jEb4fZT9pJ2LB8NTghsQnw1QRy9fwNNbFqI+LeFaQ8Zj44Vcm0tTQ/Vkk6OEp57DW5FHb5h3umrdkFdOuWshXb3qaaVctZP6S0uu7lSqK8jjOub7znk8fZD9pQ/fV/F+89oluPYXexIDPfGooz7y2rt9njrauj9ORTJFKGUJAisPH78Sf/raGeEwkU8acmXszbez2XPbAi92u7UilmDB8m5wJBGOHDc55HPDN2ZxzvfLGp4+yF0xmfjz74LFc9+eVmBntvSyU+bepn2L2jHE0NzXw2MtrOP2O5/m4PVnwmmI1JMSPjt2bc+5dQofBxx3BfR9c/hb1cXHG58Zy8v6ju2LPl6WWb1gr1/Eo1vk456qPNz796M6nXuOy362gLh4DjNnTx5FMpfjpIyvzXvOrZ99g9oxxAEwYPoRUEbX2bj51X976sI2L719e8LxLvjSBUdttTX08TltnZ7evtSeN6x9dycn7f1L4s9DcSb7qBNnHPfvMOVcMn/PpJ3c+9RoX37+c9qSxoT1JW2fwy32HwVuRKPBdroura4fUzPmkreqCi7KnkE6dOpoZ43di4oghNOXYPiBtUH28axvpTZ25e1LpHkmmzZ078ewz51wxvOfTD9aub+sxXwJBy37ZAy9SaPonmbJuvYLM3kd6AWdHZ5J/rv2YyaO27aoCPXJoI50Fqn12ZvQ2UnnOa08OTI/Es8+cc73xxqePMlOJW9dtpD4u2ruPbNGeTLFVXbzb8YZEjKRZ8HfKCvYKhg6qZ+yw4GstuzT3+PqZnw/mlurjMTZ1JulMWlfCQspg0cp3+VTzIBoS8a75nkxnHTx2wBqG7OE4L/zpnMvkjU8fZK/s/95R43P2Qs7/wp5c86eXux2T4A+zD2RDezLnL+JiimRmngPGrIN25YiJO3HktU90lfjpSBrn37eMB876LEnr2fWqj6vbfM9A8sKfzrlsPudTolyFLC9/4EW+d+T4rm2c6xMxrvjKRGZ9brec8x9jhw3OOa9STJHM7HPSc0urP9hIfbz7j7MuFmNDe5I5Myd1m3eqi4urj5tUlh6IF/50zuXiPZ8S5StkOXHEEBZdML2odORS752ZppzvnHyLQUcObWTSqG2ZNnZ7Vqz+EDAmDB9StqEvL/zpnMvFG58SFUolLjYduS/37u2cfItB0/9uc1MDB+2+Q9Gvs7946rVzLhcfdivRQKYSF3PvQuccPXkEiy6Yzh3f2p9FF0yviHkVT712zuUiK2JRYzWTdDjwMyAO3GRmV+Y7t6WlxRYvXlzUfQcye6uYe1db9li1xeucK56k58yspZRranrYTVIcuB44FGgFnpU038x6LsopUbFDaQN174H89wdCtcXrnBtYtT7sth+w0sz+YWbtwN3AMRHH5JxzW7ya7vkAI4A3Mj5vBfbPPEHSLGBW+GmbpMIF0yrD9sC7UQdRBI+zf3mc/acaYoTqiXOPUi+o9cYn1+463Sa5zOxG4EYASYtLHbeMgsfZvzzO/lUNcVZDjFBdcZZ6Ta0Pu7UCozI+HwmsjigW55xzoVpvfJ4FxknaRVI9cCIwP+KYnHNui1fTw25m1inpLOBhglTrW8xsRYFLbixPZJvN4+xfHmf/qoY4qyFGqOE4a36dj3POucpT68NuzjnnKpA3Ps4558rOG5+QpMMl/V3SSkkXRh1PLpJGSfqzpJckrZB0dtQx5SMpLumvkh6IOpZ8JG0raa6kv4Xf06lRx5SLpP8If97LJd0laauoYwKQdIukNZlr4yRtJ2mBpFfCv4dGGWMYU64454Q/92WSfitp2yhjDGPqEWfG186VZJK2jyK2rFhyxilpdvg7dIWkH/V2H2986FaG5whgPHCSpPHRRpVTJ3COme0FHACcWaFxApwNvBR1EL34GfAHM9sTmEQFxitpBPBtoMXMJhIkzpwYbVRdbgUOzzp2IfCImY0DHgk/j9qt9IxzATDRzPYGXgYuKndQOdxKzziRNIqgRNjr5Q4oj1vJilPSwQTVY/Y2swnA1b3dxBufQFWU4TGzN83s+fDjjwh+WUZfujqLpJHAkcBNUceSj6RtgIOAmwHMrN3M3o82qrwSQKOkBLA1FbJWzcweA97LOnwMcFv48W3Al8saVA654jSzP5pZeoP7pwjWAEYqz/cT4CfA+WQtkI9KnjjPAK40s7bwnDW93ccbn0CuMjwV90s9k6QxwBTg6WgjyemnBG+Wnvt3V45dgXeA/w6HB2+SNCjqoLKZ2SqCp8jXgTeBD8zsj9FGVdAwM3sTgoclYMeI4ynGN4CHog4iF0lHA6vMbGnUsfRid+BASU9L+oukz/R2gTc+gV7L8FQSSU3AfcB3zOzDqOPJJOkoYI2ZPRd1LL1IAPsAN5jZFGADlTFE1E04Z3IMsAswHBgk6avRRlU7JF1MMJx9Z9SxZJO0NXAxcEnUsRQhAQwlmA44D7hHUq7fq1288QlUTRkeSXUEDc+dZvabqOPJYRpwtKR/EgxfTpd0R7Qh5dQKtJpZuuc4l6AxqjSHAK+a2Ttm1gH8BviXiGMq5G1JOwOEf/c6/BIVSacBRwGnWGUueNyN4KFjafh+Ggk8L2mnSKPKrRX4jQWeIRj1KJgc4Y1PoCrK8IRPEjcDL5nZNVHHk4uZXWRmI81sDMH3caGZVdyTupm9BbwhKV2Ndwaw2fs8DYDXgQMkbR3+/GdQgYkRGeYDp4UfnwbMizCWvMJNJi8Ajjazj6OOJxcze8HMdjSzMeH7qRXYJ/y/W2nuB6YDSNodqKeXatze+BCU4QHSZXheAu7ppQxPVKYBXyPoTSwJ/3wx6qCq2GzgTknLgMnADyKOp4ewZzYXeB54geA9WxElVyTdBTwJ7CGpVdI3gSuBQyW9QpChlXfn4HLJE+d1wGBgQfg++kWkQZI3zoqTJ85bgF3D9Ou7gdN66016eR3nnHNl5z0f55xzZeeNj3POubLzxsc551zZeePjnHOu7Lzxcc45V3be+DjnnCs7b3ycGyCSHpXUEn78+/4s2y/pdEmn9tf9nCu3RNQBOLclMLN+XQxsZpEvinRuc3jPx7kMksaEm4zdFG7edqekQyQtCjdI20/SoHBDrWfDitjHhNc2Sro73KDs10Bjxn3/md4ITNL9kp4LN92alXHOeklXSFoq6SlJwwrE+Z+Szg0/flTSVZKekfSypAPD43FJV0t6IYxpdnh8Rhj3C+HraMiI8QeSnpS0WNI+kh6W9H+STs/4t88LX/sySZf16w/AbTG88XGup7EEG83tDewJnAx8FjgX+C5BpeGFZvYZ4GBgTrgdwxnAx+EGZVcA++a5/zfMbF+gBfi2pObw+CDgKTObBDwG/HsJMSfMbD/gO8Cl4bFZBIUpp4Qx3algF9RbgRPM7NMEox9nZNznDTObCjwenjeToFLx9wEkHQaMI9gDazKwr6SDSojTOcAbH+dyeTUs6pgCVhDszGkEtdXGAIcBF0paAjwKbAWMJtic7g4AM1sGLMtz/29LWkqwidkogl/mAO1Aetvx58J/q1jpCueZ1x0C/CK9aZqZvQfsEb6+l8NzbgvjTksX1H0BeNrMPjKzd4BN4ZzVYeGfvxLUm9szI37niuZzPs711JbxcSrj8xTBeyYJHGtmf8+8KNy+pHAxRenzBI3CVDP7WNKjBI0XQEdGMcYkpb0/0zFmXqcc8RTcY4XurzX7+5AIr/+hmf1XCbE514P3fJwr3cPA7PRmWZKmhMcfA04Jj00kGLbLNgRYFzY8exIMaQ2UPwKnK9h+G0nbAX8DxkgaG57zNeB+r9/6AAAAr0lEQVQvJdzzYeAbCjY0RNIISdWwW6mrMN74OFe6y4E6YFlYQv7y8PgNQFO4RcP5wDM5rv0DkAjPuZxg6G2g3ESwH9CycJjvZDPbBHwduFfSCwQ9mqIz58ItvH8FPBleP5dgawLnSuJbKjjnnCs77/k455wrO084cK6CSboYOC7r8L1mdkUU8TjXX3zYzTnnXNn5sJtzzrmy88bHOedc2Xnj45xzruy88XHOOVd2/w/snnvGpldP0QAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from pandas.plotting import scatter_matrix\n",
    "import matplotlib.pyplot as plt\n",
    "#第二个正相关\n",
    "#收入和房价\n",
    "\n",
    "housing.plot(kind='scatter',x='median_income',y='median_house_value')\n",
    "#设置y上限\n",
    "plt.axis([0,16,0,550000])\n",
    "#出现的短线是异常值，需要删除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'household_per_populations'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[1;32mc:\\users\\administrator.desktop-0hpsd3a\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m   2645\u001b[0m             \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2646\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2647\u001b[0m             \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;31mKeyError\u001b[0m: 'household_per_populations'",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-17-bd3a75d557a6>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mhousing\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkind\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'scatter'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'household_per_populations'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'median_house_value'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[1;31m#设置y上限\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m0.6\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m550000\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[1;31m#出现的短线是异常值，需要删除\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\administrator.desktop-0hpsd3a\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\pandas\\plotting\\_core.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    790\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mkind\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_dataframe_kinds\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    791\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mABCDataFrame\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 792\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mplot_backend\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mkind\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    793\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    794\u001b[0m                 \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"plot kind {kind} can only be used for data frames\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\administrator.desktop-0hpsd3a\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\pandas\\plotting\\_matplotlib\\__init__.py\u001b[0m in \u001b[0;36mplot\u001b[1;34m(data, kind, **kwargs)\u001b[0m\n\u001b[0;32m     59\u001b[0m             \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"ax\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0max\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"left_ax\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0max\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     60\u001b[0m     \u001b[0mplot_obj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mPLOT_CLASSES\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkind\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 61\u001b[1;33m     \u001b[0mplot_obj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgenerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     62\u001b[0m     \u001b[0mplot_obj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdraw\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     63\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mplot_obj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\administrator.desktop-0hpsd3a\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\pandas\\plotting\\_matplotlib\\core.py\u001b[0m in \u001b[0;36mgenerate\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    261\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_compute_plot_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    262\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_setup_subplots\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 263\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_plot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    264\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_add_table\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    265\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_legend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\administrator.desktop-0hpsd3a\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\pandas\\plotting\\_matplotlib\\core.py\u001b[0m in \u001b[0;36m_make_plot\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    968\u001b[0m             \u001b[0mlabel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    969\u001b[0m         scatter = ax.scatter(\n\u001b[1;32m--> 970\u001b[1;33m             \u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    971\u001b[0m             \u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    972\u001b[0m             \u001b[0mc\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mc_values\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\administrator.desktop-0hpsd3a\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m   2798\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2799\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2800\u001b[1;33m             \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2801\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2802\u001b[0m                 \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\administrator.desktop-0hpsd3a\\appdata\\local\\programs\\python\\python37-32\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_loc\u001b[1;34m(self, key, method, tolerance)\u001b[0m\n\u001b[0;32m   2646\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2647\u001b[0m             \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2648\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_cast_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2649\u001b[0m         \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtolerance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtolerance\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2650\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msize\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;31mKeyError\u001b[0m: 'household_per_populations'"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAANQklEQVR4nO3cX2id933H8fdndg3rnzWhUUtnp9QbTlNfNCNR0zDWLV3ZamcXptCLpKVhoWDCmtLLhMHai9ysF4NSktSYYEJv6os1tO5IGwajzSBLFxlSJ05I0VwWay7EaUsHKSw4+e7inE1Cka3H5xxJjr7vFwj0nOcn6asf8tuPj3WeVBWSpO3vd7Z6AEnS5jD4ktSEwZekJgy+JDVh8CWpCYMvSU2sG/wkx5K8nOS5i5xPkm8kWUxyKsmNsx9TkjStIVf4jwAHLnH+ILBv/HYY+Ob0Y0mSZm3d4FfVE8CvLrHkEPCtGnkKuCrJ+2c1oCRpNnbO4HPsBs6uOF4aP/aL1QuTHGb0rwDe8Y533HT99dfP4MtLUh8nT558parmJvnYWQQ/azy25v0aquoocBRgfn6+FhYWZvDlJamPJP856cfO4rd0loBrVxzvAc7N4PNKkmZoFsE/Adw5/m2dW4DfVNWbns6RJG2tdZ/SSfJt4FbgmiRLwFeBtwFU1RHgMeA2YBH4LXDXRg0rSZrcusGvqjvWOV/AF2c2kSRpQ/hKW0lqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpoYFPwkB5K8mGQxyX1rnH93ku8n+WmS00numv2okqRprBv8JDuAB4GDwH7gjiT7Vy37IvB8Vd0A3Ar8Q5JdM55VkjSFIVf4NwOLVXWmql4DjgOHVq0p4F1JArwT+BVwYaaTSpKmMiT4u4GzK46Xxo+t9ADwYeAc8Czw5ap6Y/UnSnI4yUKShfPnz084siRpEkOCnzUeq1XHnwKeAX4f+CPggSS/96YPqjpaVfNVNT83N3fZw0qSJjck+EvAtSuO9zC6kl/pLuDRGlkEfg5cP5sRJUmzMCT4TwP7kuwd/0fs7cCJVWteAj4JkOR9wIeAM7McVJI0nZ3rLaiqC0nuAR4HdgDHqup0krvH548A9wOPJHmW0VNA91bVKxs4tyTpMq0bfICqegx4bNVjR1a8fw74y9mOJkmaJV9pK0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqYlDwkxxI8mKSxST3XWTNrUmeSXI6yY9nO6YkaVo711uQZAfwIPAXwBLwdJITVfX8ijVXAQ8BB6rqpSTv3aiBJUmTGXKFfzOwWFVnquo14DhwaNWazwKPVtVLAFX18mzHlCRNa0jwdwNnVxwvjR9b6Trg6iQ/SnIyyZ1rfaIkh5MsJFk4f/78ZBNLkiYyJPhZ47FadbwTuAn4K+BTwN8lue5NH1R1tKrmq2p+bm7usoeVJE1u3efwGV3RX7vieA9wbo01r1TVq8CrSZ4AbgB+NpMpJUlTG3KF/zSwL8neJLuA24ETq9Z8D/h4kp1J3g58DHhhtqNKkqax7hV+VV1Icg/wOLADOFZVp5PcPT5/pKpeSPJD4BTwBvBwVT23kYNLki5PqlY/Hb855ufna2FhYUu+tiS9VSU5WVXzk3ysr7SVpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpiUHBT3IgyYtJFpPcd4l1H03yepLPzG5ESdIsrBv8JDuAB4GDwH7gjiT7L7Lua8Djsx5SkjS9IVf4NwOLVXWmql4DjgOH1lj3JeA7wMsznE+SNCNDgr8bOLvieGn82P9Lshv4NHDkUp8oyeEkC0kWzp8/f7mzSpKmMCT4WeOxWnX8deDeqnr9Up+oqo5W1XxVzc/NzQ2dUZI0AzsHrFkCrl1xvAc4t2rNPHA8CcA1wG1JLlTVd2cypSRpakOC/zSwL8le4L+A24HPrlxQVXv/7/0kjwD/ZOwl6cqybvCr6kKSexj99s0O4FhVnU5y9/j8JZ+3lyRdGYZc4VNVjwGPrXpszdBX1V9PP5YkadZ8pa0kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqYlBwU9yIMmLSRaT3LfG+c8lOTV+ezLJDbMfVZI0jXWDn2QH8CBwENgP3JFk/6plPwf+rKo+AtwPHJ31oJKk6Qy5wr8ZWKyqM1X1GnAcOLRyQVU9WVW/Hh8+BeyZ7ZiSpGkNCf5u4OyK46XxYxfzBeAHa51IcjjJQpKF8+fPD59SkjS1IcHPGo/VmguTTzAK/r1rna+qo1U1X1Xzc3Nzw6eUJE1t54A1S8C1K473AOdWL0ryEeBh4GBV/XI240mSZmXIFf7TwL4ke5PsAm4HTqxckOQDwKPA56vqZ7MfU5I0rXWv8KvqQpJ7gMeBHcCxqjqd5O7x+SPAV4D3AA8lAbhQVfMbN7Yk6XKlas2n4zfc/Px8LSwsbMnXlqS3qiQnJ72g9pW2ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNWHwJakJgy9JTRh8SWrC4EtSEwZfkpow+JLUhMGXpCYMviQ1YfAlqQmDL0lNGHxJasLgS1ITBl+SmjD4ktSEwZekJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5KaMPiS1ITBl6QmDL4kNTEo+EkOJHkxyWKS+9Y4nyTfGJ8/leTG2Y8qSZrGusFPsgN4EDgI7AfuSLJ/1bKDwL7x22HgmzOeU5I0pSFX+DcDi1V1pqpeA44Dh1atOQR8q0aeAq5K8v4ZzypJmsLOAWt2A2dXHC8BHxuwZjfwi5WLkhxm9C8AgP9J8txlTbt9XQO8stVDXCHci2XuxTL3YtmHJv3AIcHPGo/VBGuoqqPAUYAkC1U1P+Drb3vuxTL3Ypl7scy9WJZkYdKPHfKUzhJw7YrjPcC5CdZIkrbQkOA/DexLsjfJLuB24MSqNSeAO8e/rXML8Juq+sXqTyRJ2jrrPqVTVReS3AM8DuwAjlXV6SR3j88fAR4DbgMWgd8Cdw342kcnnnr7cS+WuRfL3Itl7sWyifciVW96ql2StA35SltJasLgS1ITGx58b8uwbMBefG68B6eSPJnkhq2YczOstxcr1n00yetJPrOZ822mIXuR5NYkzyQ5neTHmz3jZhnwZ+TdSb6f5KfjvRjy/4VvOUmOJXn5Yq9VmribVbVhb4z+k/c/gD8AdgE/BfavWnMb8ANGv8t/C/CTjZxpq94G7sUfA1eP3z/YeS9WrPsXRr8U8JmtnnsLfy6uAp4HPjA+fu9Wz72Fe/G3wNfG788BvwJ2bfXsG7AXfwrcCDx3kfMTdXOjr/C9LcOydfeiqp6sql+PD59i9HqG7WjIzwXAl4DvAC9v5nCbbMhefBZ4tKpeAqiq7bofQ/aigHclCfBORsG/sLljbryqeoLR93YxE3Vzo4N/sVsuXO6a7eByv88vMPobfDtady+S7AY+DRzZxLm2wpCfi+uAq5P8KMnJJHdu2nSba8hePAB8mNELO58FvlxVb2zOeFeUibo55NYK05jZbRm2gcHfZ5JPMAr+n2zoRFtnyF58Hbi3ql4fXcxtW0P2YidwE/BJ4HeBf0vyVFX9bKOH22RD9uJTwDPAnwN/CPxzkn+tqv/e6OGuMBN1c6OD720Zlg36PpN8BHgYOFhVv9yk2TbbkL2YB46PY38NcFuSC1X13c0ZcdMM/TPySlW9Crya5AngBmC7BX/IXtwF/H2NnsheTPJz4Hrg3zdnxCvGRN3c6Kd0vC3DsnX3IskHgEeBz2/Dq7eV1t2LqtpbVR+sqg8C/wj8zTaMPQz7M/I94ONJdiZ5O6O71b6wyXNuhiF78RKjf+mQ5H2M7hx5ZlOnvDJM1M0NvcKvjbstw1vOwL34CvAe4KHxle2F2oZ3CBy4Fy0M2YuqeiHJD4FTwBvAw1W17W4tPvDn4n7gkSTPMnpa496q2na3TU7ybeBW4JokS8BXgbfBdN301gqS1ISvtJWkJgy+JDVh8CWpCYMvSU0YfElqwuBLUhMGX5Ka+F/Xe3Wlc9XddQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "housing.plot(kind='scatter',x='household_per_populations',y='median_house_value')\n",
    "#设置y上限\n",
    "plt.axis([0,0.6,0,550000])\n",
    "#出现的短线是异常值，需要删除"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 机器学习算法的数据准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 16512 entries, 17606 to 15775\n",
      "Data columns (total 10 columns):\n",
      " #   Column              Non-Null Count  Dtype   \n",
      "---  ------              --------------  -----   \n",
      " 0   longitude           16512 non-null  float64 \n",
      " 1   latitude            16512 non-null  float64 \n",
      " 2   housing_median_age  16512 non-null  float64 \n",
      " 3   total_rooms         16512 non-null  float64 \n",
      " 4   total_bedrooms      16354 non-null  float64 \n",
      " 5   population          16512 non-null  float64 \n",
      " 6   households          16512 non-null  float64 \n",
      " 7   median_income       16512 non-null  float64 \n",
      " 8   ocean_proximity     16512 non-null  object  \n",
      " 9   income_cat          16512 non-null  category\n",
      "dtypes: category(1), float64(8), object(1)\n",
      "memory usage: 1.2+ MB\n"
     ]
    }
   ],
   "source": [
    "#分离标签和数据集\n",
    "housing_labels=strat_train_set['median_house_value'].copy()\n",
    "housing=strat_train_set.drop(['median_house_value'],axis=1)\n",
    "#插看空值\n",
    "housing.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SimpleImputer(add_indicator=False, copy=True, fill_value=None,\n",
       "              missing_values=nan, strategy='median', verbose=0)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#数据清洗\n",
    "rows=housing[housing.isnull().any(axis=1)]\n",
    "rows.head()\n",
    "from sklearn.impute import SimpleImputer \n",
    "imputer = SimpleImputer(strategy = 'median')\n",
    "\n",
    "# 使用fit() 方法将 imputer实例适配到训练集\n",
    "housing_num=housing.drop('ocean_proximity',axis=1)\n",
    "imputer.fit(housing_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = imputer.transform(housing_num)\n",
    "housing_tr = pd.DataFrame(X, columns = housing_num.columns, index = housing_num.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<16512x5 sparse matrix of type '<class 'numpy.intc'>'\n",
       "\twith 16512 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#处理文本和分类属性\n",
    "housing_cat = housing[['ocean_proximity']]\n",
    "# 将五种值的文本转化成对应的数字分类 ， 使用转换器\n",
    "from sklearn.preprocessing import LabelBinarizer\n",
    "#把独热编码压缩成稀疏矩阵\n",
    "#sparse_output指的是转为稀疏矩阵，而不是独热编码离散形式\n",
    "encoder = LabelBinarizer(sparse_output = True)\n",
    "housing_cat_1hot = encoder.fit_transform(housing_cat)\n",
    "housing_cat_1hot"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 自定义转换器\n",
    "尝试在数据处理流水线中，添加一个转换器，从而选出最重要的属性 10分 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.base import BaseEstimator, TransformerMixin\n",
    "#获取需要查找的四个值的列名，以元组形式输出\n",
    "rooms_ix, bedrooms_ix, population_ix, household_ix,median_income_ix,housing_median_age_ix =[list(housing.columns).index(col) for col in (\"total_rooms\", \"total_bedrooms\", \"population\", \"households\",\"median_income\",\"housing_median_age\")]\n",
    "\n",
    "class CombinedAttributesAdder(BaseEstimator, TransformerMixin):\n",
    "    def __init__(self, add_bedrooms_per_room = True):\n",
    "        self.add_bedrooms_per_room = add_bedrooms_per_room\n",
    "    #训练数据，y值可选\n",
    "    #fit是数据建模\n",
    "    def fit(self, X, y = None):\n",
    "        return self\n",
    "    #主要实现方法\n",
    "    def transform(self, X, y = None):\n",
    "        households_per_population  = X[:, rooms_ix] / X[:, population_ix]\n",
    "        median_income_per_housing_median_age = X[:, median_income_ix] / X[:, housing_median_age_ix]\n",
    "        if self.add_bedrooms_per_room:\n",
    "            bedrooms_per_room = X[:, bedrooms_ix] / X[:, rooms_ix]\n",
    "            return np.c_[X, households_per_population,median_income_per_housing_median_age, bedrooms_per_room]\n",
    "        else:\n",
    "            return np.c_[X, households_per_population,median_income_per_housing_median_age]\n",
    "\n",
    "        \n",
    "\n",
    "attr_adder = CombinedAttributesAdder(add_bedrooms_per_room = False)\n",
    "housing_extra_attribs = attr_adder.transform(housing.values)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-121.89</td>\n",
       "      <td>37.29</td>\n",
       "      <td>38</td>\n",
       "      <td>1568</td>\n",
       "      <td>351</td>\n",
       "      <td>710</td>\n",
       "      <td>339</td>\n",
       "      <td>2.7042</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>2</td>\n",
       "      <td>2.20845</td>\n",
       "      <td>0.0711632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-121.93</td>\n",
       "      <td>37.05</td>\n",
       "      <td>14</td>\n",
       "      <td>679</td>\n",
       "      <td>108</td>\n",
       "      <td>306</td>\n",
       "      <td>113</td>\n",
       "      <td>6.4214</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>5</td>\n",
       "      <td>2.21895</td>\n",
       "      <td>0.458671</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-117.2</td>\n",
       "      <td>32.77</td>\n",
       "      <td>31</td>\n",
       "      <td>1952</td>\n",
       "      <td>471</td>\n",
       "      <td>936</td>\n",
       "      <td>462</td>\n",
       "      <td>2.8621</td>\n",
       "      <td>NEAR OCEAN</td>\n",
       "      <td>2</td>\n",
       "      <td>2.08547</td>\n",
       "      <td>0.0923258</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-119.61</td>\n",
       "      <td>36.31</td>\n",
       "      <td>25</td>\n",
       "      <td>1847</td>\n",
       "      <td>371</td>\n",
       "      <td>1460</td>\n",
       "      <td>353</td>\n",
       "      <td>1.8839</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>2</td>\n",
       "      <td>1.26507</td>\n",
       "      <td>0.075356</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-118.59</td>\n",
       "      <td>34.23</td>\n",
       "      <td>17</td>\n",
       "      <td>6592</td>\n",
       "      <td>1525</td>\n",
       "      <td>4459</td>\n",
       "      <td>1463</td>\n",
       "      <td>3.0347</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>3</td>\n",
       "      <td>1.47836</td>\n",
       "      <td>0.178512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16507</th>\n",
       "      <td>-118.13</td>\n",
       "      <td>34.2</td>\n",
       "      <td>46</td>\n",
       "      <td>1271</td>\n",
       "      <td>236</td>\n",
       "      <td>573</td>\n",
       "      <td>210</td>\n",
       "      <td>4.9312</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>4</td>\n",
       "      <td>2.21815</td>\n",
       "      <td>0.1072</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16508</th>\n",
       "      <td>-117.56</td>\n",
       "      <td>33.88</td>\n",
       "      <td>40</td>\n",
       "      <td>1196</td>\n",
       "      <td>294</td>\n",
       "      <td>1052</td>\n",
       "      <td>258</td>\n",
       "      <td>2.0682</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>2</td>\n",
       "      <td>1.13688</td>\n",
       "      <td>0.051705</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16509</th>\n",
       "      <td>-116.4</td>\n",
       "      <td>34.09</td>\n",
       "      <td>9</td>\n",
       "      <td>4855</td>\n",
       "      <td>872</td>\n",
       "      <td>2098</td>\n",
       "      <td>765</td>\n",
       "      <td>3.2723</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>3</td>\n",
       "      <td>2.31411</td>\n",
       "      <td>0.363589</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16510</th>\n",
       "      <td>-118.01</td>\n",
       "      <td>33.82</td>\n",
       "      <td>31</td>\n",
       "      <td>1960</td>\n",
       "      <td>380</td>\n",
       "      <td>1356</td>\n",
       "      <td>356</td>\n",
       "      <td>4.0625</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>3</td>\n",
       "      <td>1.44543</td>\n",
       "      <td>0.131048</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16511</th>\n",
       "      <td>-122.45</td>\n",
       "      <td>37.77</td>\n",
       "      <td>52</td>\n",
       "      <td>3095</td>\n",
       "      <td>682</td>\n",
       "      <td>1269</td>\n",
       "      <td>639</td>\n",
       "      <td>3.575</td>\n",
       "      <td>NEAR BAY</td>\n",
       "      <td>3</td>\n",
       "      <td>2.43893</td>\n",
       "      <td>0.06875</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>16512 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            0      1   2     3     4     5     6       7           8  9  \\\n",
       "0     -121.89  37.29  38  1568   351   710   339  2.7042   <1H OCEAN  2   \n",
       "1     -121.93  37.05  14   679   108   306   113  6.4214   <1H OCEAN  5   \n",
       "2      -117.2  32.77  31  1952   471   936   462  2.8621  NEAR OCEAN  2   \n",
       "3     -119.61  36.31  25  1847   371  1460   353  1.8839      INLAND  2   \n",
       "4     -118.59  34.23  17  6592  1525  4459  1463  3.0347   <1H OCEAN  3   \n",
       "...       ...    ...  ..   ...   ...   ...   ...     ...         ... ..   \n",
       "16507 -118.13   34.2  46  1271   236   573   210  4.9312      INLAND  4   \n",
       "16508 -117.56  33.88  40  1196   294  1052   258  2.0682      INLAND  2   \n",
       "16509  -116.4  34.09   9  4855   872  2098   765  3.2723      INLAND  3   \n",
       "16510 -118.01  33.82  31  1960   380  1356   356  4.0625   <1H OCEAN  3   \n",
       "16511 -122.45  37.77  52  3095   682  1269   639   3.575    NEAR BAY  3   \n",
       "\n",
       "            10         11  \n",
       "0      2.20845  0.0711632  \n",
       "1      2.21895   0.458671  \n",
       "2      2.08547  0.0923258  \n",
       "3      1.26507   0.075356  \n",
       "4      1.47836   0.178512  \n",
       "...        ...        ...  \n",
       "16507  2.21815     0.1072  \n",
       "16508  1.13688   0.051705  \n",
       "16509  2.31411   0.363589  \n",
       "16510  1.44543   0.131048  \n",
       "16511  2.43893    0.06875  \n",
       "\n",
       "[16512 rows x 12 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing_tr=pd.DataFrame(housing_extra_attribs)\n",
    "housing_tr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "housing_extra_attribs = pd.DataFrame(\n",
    "    housing_extra_attribs,\n",
    "    columns=list(housing.columns)+[\"households_per_population\", \"median_income_per_housing_median_age\"],\n",
    "    index=housing.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>housing_median_age</th>\n",
       "      <th>total_rooms</th>\n",
       "      <th>total_bedrooms</th>\n",
       "      <th>population</th>\n",
       "      <th>households</th>\n",
       "      <th>median_income</th>\n",
       "      <th>ocean_proximity</th>\n",
       "      <th>income_cat</th>\n",
       "      <th>households_per_population</th>\n",
       "      <th>median_income_per_housing_median_age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>17606</th>\n",
       "      <td>-121.89</td>\n",
       "      <td>37.29</td>\n",
       "      <td>38</td>\n",
       "      <td>1568</td>\n",
       "      <td>351</td>\n",
       "      <td>710</td>\n",
       "      <td>339</td>\n",
       "      <td>2.7042</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>2</td>\n",
       "      <td>2.20845</td>\n",
       "      <td>0.0711632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18632</th>\n",
       "      <td>-121.93</td>\n",
       "      <td>37.05</td>\n",
       "      <td>14</td>\n",
       "      <td>679</td>\n",
       "      <td>108</td>\n",
       "      <td>306</td>\n",
       "      <td>113</td>\n",
       "      <td>6.4214</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>5</td>\n",
       "      <td>2.21895</td>\n",
       "      <td>0.458671</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14650</th>\n",
       "      <td>-117.2</td>\n",
       "      <td>32.77</td>\n",
       "      <td>31</td>\n",
       "      <td>1952</td>\n",
       "      <td>471</td>\n",
       "      <td>936</td>\n",
       "      <td>462</td>\n",
       "      <td>2.8621</td>\n",
       "      <td>NEAR OCEAN</td>\n",
       "      <td>2</td>\n",
       "      <td>2.08547</td>\n",
       "      <td>0.0923258</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3230</th>\n",
       "      <td>-119.61</td>\n",
       "      <td>36.31</td>\n",
       "      <td>25</td>\n",
       "      <td>1847</td>\n",
       "      <td>371</td>\n",
       "      <td>1460</td>\n",
       "      <td>353</td>\n",
       "      <td>1.8839</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>2</td>\n",
       "      <td>1.26507</td>\n",
       "      <td>0.075356</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3555</th>\n",
       "      <td>-118.59</td>\n",
       "      <td>34.23</td>\n",
       "      <td>17</td>\n",
       "      <td>6592</td>\n",
       "      <td>1525</td>\n",
       "      <td>4459</td>\n",
       "      <td>1463</td>\n",
       "      <td>3.0347</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>3</td>\n",
       "      <td>1.47836</td>\n",
       "      <td>0.178512</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      longitude latitude housing_median_age total_rooms total_bedrooms  \\\n",
       "17606   -121.89    37.29                 38        1568            351   \n",
       "18632   -121.93    37.05                 14         679            108   \n",
       "14650    -117.2    32.77                 31        1952            471   \n",
       "3230    -119.61    36.31                 25        1847            371   \n",
       "3555    -118.59    34.23                 17        6592           1525   \n",
       "\n",
       "      population households median_income ocean_proximity income_cat  \\\n",
       "17606        710        339        2.7042       <1H OCEAN          2   \n",
       "18632        306        113        6.4214       <1H OCEAN          5   \n",
       "14650        936        462        2.8621      NEAR OCEAN          2   \n",
       "3230        1460        353        1.8839          INLAND          2   \n",
       "3555        4459       1463        3.0347       <1H OCEAN          3   \n",
       "\n",
       "      households_per_population median_income_per_housing_median_age  \n",
       "17606                   2.20845                            0.0711632  \n",
       "18632                   2.21895                             0.458671  \n",
       "14650                   2.08547                            0.0923258  \n",
       "3230                    1.26507                             0.075356  \n",
       "3555                    1.47836                             0.178512  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing_extra_attribs.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>longitude</th>\n",
       "      <th>latitude</th>\n",
       "      <th>housing_median_age</th>\n",
       "      <th>total_rooms</th>\n",
       "      <th>total_bedrooms</th>\n",
       "      <th>population</th>\n",
       "      <th>households</th>\n",
       "      <th>median_income</th>\n",
       "      <th>ocean_proximity</th>\n",
       "      <th>income_cat</th>\n",
       "      <th>households_per_population</th>\n",
       "      <th>median_income_per_housing_median_age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12347</th>\n",
       "      <td>-116.54</td>\n",
       "      <td>33.82</td>\n",
       "      <td>12</td>\n",
       "      <td>9482</td>\n",
       "      <td>2501</td>\n",
       "      <td>2725</td>\n",
       "      <td>1300</td>\n",
       "      <td>1.5595</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>2</td>\n",
       "      <td>3.47963</td>\n",
       "      <td>0.129958</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6263</th>\n",
       "      <td>-117.96</td>\n",
       "      <td>34.04</td>\n",
       "      <td>34</td>\n",
       "      <td>1381</td>\n",
       "      <td>265</td>\n",
       "      <td>1020</td>\n",
       "      <td>268</td>\n",
       "      <td>4.025</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>3</td>\n",
       "      <td>1.35392</td>\n",
       "      <td>0.118382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12208</th>\n",
       "      <td>-117.1</td>\n",
       "      <td>33.56</td>\n",
       "      <td>6</td>\n",
       "      <td>1868</td>\n",
       "      <td>289</td>\n",
       "      <td>750</td>\n",
       "      <td>247</td>\n",
       "      <td>4.3833</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>3</td>\n",
       "      <td>2.49067</td>\n",
       "      <td>0.73055</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6396</th>\n",
       "      <td>-118.03</td>\n",
       "      <td>34.14</td>\n",
       "      <td>44</td>\n",
       "      <td>1446</td>\n",
       "      <td>250</td>\n",
       "      <td>721</td>\n",
       "      <td>243</td>\n",
       "      <td>4.7308</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>4</td>\n",
       "      <td>2.00555</td>\n",
       "      <td>0.107518</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12601</th>\n",
       "      <td>-121.48</td>\n",
       "      <td>38.53</td>\n",
       "      <td>37</td>\n",
       "      <td>1704</td>\n",
       "      <td>361</td>\n",
       "      <td>902</td>\n",
       "      <td>356</td>\n",
       "      <td>1.9837</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>2</td>\n",
       "      <td>1.88914</td>\n",
       "      <td>0.0536135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13354</th>\n",
       "      <td>-117.61</td>\n",
       "      <td>34.02</td>\n",
       "      <td>15</td>\n",
       "      <td>1791</td>\n",
       "      <td>346</td>\n",
       "      <td>1219</td>\n",
       "      <td>328</td>\n",
       "      <td>3.8125</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>3</td>\n",
       "      <td>1.46924</td>\n",
       "      <td>0.254167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5749</th>\n",
       "      <td>-118.27</td>\n",
       "      <td>34.16</td>\n",
       "      <td>45</td>\n",
       "      <td>1865</td>\n",
       "      <td>360</td>\n",
       "      <td>973</td>\n",
       "      <td>349</td>\n",
       "      <td>3.6587</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>3</td>\n",
       "      <td>1.91675</td>\n",
       "      <td>0.0813044</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18799</th>\n",
       "      <td>-121.89</td>\n",
       "      <td>40.97</td>\n",
       "      <td>26</td>\n",
       "      <td>1183</td>\n",
       "      <td>276</td>\n",
       "      <td>513</td>\n",
       "      <td>206</td>\n",
       "      <td>2.225</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>2</td>\n",
       "      <td>2.30604</td>\n",
       "      <td>0.0855769</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15022</th>\n",
       "      <td>-117</td>\n",
       "      <td>32.77</td>\n",
       "      <td>30</td>\n",
       "      <td>1802</td>\n",
       "      <td>401</td>\n",
       "      <td>776</td>\n",
       "      <td>386</td>\n",
       "      <td>2.8125</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>2</td>\n",
       "      <td>2.32216</td>\n",
       "      <td>0.09375</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16834</th>\n",
       "      <td>-122.55</td>\n",
       "      <td>37.59</td>\n",
       "      <td>31</td>\n",
       "      <td>1331</td>\n",
       "      <td>245</td>\n",
       "      <td>598</td>\n",
       "      <td>225</td>\n",
       "      <td>4.1827</td>\n",
       "      <td>NEAR OCEAN</td>\n",
       "      <td>3</td>\n",
       "      <td>2.22575</td>\n",
       "      <td>0.134926</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1468</th>\n",
       "      <td>-121.99</td>\n",
       "      <td>37.96</td>\n",
       "      <td>17</td>\n",
       "      <td>2756</td>\n",
       "      <td>423</td>\n",
       "      <td>1228</td>\n",
       "      <td>426</td>\n",
       "      <td>5.5872</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>4</td>\n",
       "      <td>2.2443</td>\n",
       "      <td>0.328659</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14906</th>\n",
       "      <td>-117.06</td>\n",
       "      <td>32.6</td>\n",
       "      <td>33</td>\n",
       "      <td>905</td>\n",
       "      <td>205</td>\n",
       "      <td>989</td>\n",
       "      <td>222</td>\n",
       "      <td>2.7014</td>\n",
       "      <td>NEAR OCEAN</td>\n",
       "      <td>2</td>\n",
       "      <td>0.915066</td>\n",
       "      <td>0.0818606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10779</th>\n",
       "      <td>-117.91</td>\n",
       "      <td>33.65</td>\n",
       "      <td>17</td>\n",
       "      <td>1328</td>\n",
       "      <td>377</td>\n",
       "      <td>762</td>\n",
       "      <td>344</td>\n",
       "      <td>2.2222</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>2</td>\n",
       "      <td>1.74278</td>\n",
       "      <td>0.130718</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7934</th>\n",
       "      <td>-118.08</td>\n",
       "      <td>33.82</td>\n",
       "      <td>26</td>\n",
       "      <td>4259</td>\n",
       "      <td>588</td>\n",
       "      <td>1644</td>\n",
       "      <td>581</td>\n",
       "      <td>6.2519</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>5</td>\n",
       "      <td>2.59063</td>\n",
       "      <td>0.240458</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9745</th>\n",
       "      <td>-121.7</td>\n",
       "      <td>36.67</td>\n",
       "      <td>37</td>\n",
       "      <td>641</td>\n",
       "      <td>129</td>\n",
       "      <td>458</td>\n",
       "      <td>142</td>\n",
       "      <td>3.3456</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>3</td>\n",
       "      <td>1.39956</td>\n",
       "      <td>0.0904216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18768</th>\n",
       "      <td>-122.24</td>\n",
       "      <td>40.51</td>\n",
       "      <td>23</td>\n",
       "      <td>2216</td>\n",
       "      <td>378</td>\n",
       "      <td>1006</td>\n",
       "      <td>338</td>\n",
       "      <td>4.559</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>4</td>\n",
       "      <td>2.20278</td>\n",
       "      <td>0.198217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5564</th>\n",
       "      <td>-118.29</td>\n",
       "      <td>33.91</td>\n",
       "      <td>41</td>\n",
       "      <td>2475</td>\n",
       "      <td>532</td>\n",
       "      <td>1416</td>\n",
       "      <td>470</td>\n",
       "      <td>3.8372</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>3</td>\n",
       "      <td>1.74788</td>\n",
       "      <td>0.0935902</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7064</th>\n",
       "      <td>-118.03</td>\n",
       "      <td>33.94</td>\n",
       "      <td>30</td>\n",
       "      <td>2572</td>\n",
       "      <td>521</td>\n",
       "      <td>1564</td>\n",
       "      <td>501</td>\n",
       "      <td>3.4861</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>3</td>\n",
       "      <td>1.6445</td>\n",
       "      <td>0.116203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13637</th>\n",
       "      <td>-117.32</td>\n",
       "      <td>34.08</td>\n",
       "      <td>41</td>\n",
       "      <td>1359</td>\n",
       "      <td>264</td>\n",
       "      <td>786</td>\n",
       "      <td>244</td>\n",
       "      <td>2.5208</td>\n",
       "      <td>INLAND</td>\n",
       "      <td>2</td>\n",
       "      <td>1.72901</td>\n",
       "      <td>0.0614829</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4827</th>\n",
       "      <td>-118.32</td>\n",
       "      <td>34.03</td>\n",
       "      <td>31</td>\n",
       "      <td>2206</td>\n",
       "      <td>501</td>\n",
       "      <td>1194</td>\n",
       "      <td>435</td>\n",
       "      <td>1.9531</td>\n",
       "      <td>&lt;1H OCEAN</td>\n",
       "      <td>2</td>\n",
       "      <td>1.84757</td>\n",
       "      <td>0.0630032</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      longitude latitude housing_median_age total_rooms total_bedrooms  \\\n",
       "12347   -116.54    33.82                 12        9482           2501   \n",
       "6263    -117.96    34.04                 34        1381            265   \n",
       "12208    -117.1    33.56                  6        1868            289   \n",
       "6396    -118.03    34.14                 44        1446            250   \n",
       "12601   -121.48    38.53                 37        1704            361   \n",
       "13354   -117.61    34.02                 15        1791            346   \n",
       "5749    -118.27    34.16                 45        1865            360   \n",
       "18799   -121.89    40.97                 26        1183            276   \n",
       "15022      -117    32.77                 30        1802            401   \n",
       "16834   -122.55    37.59                 31        1331            245   \n",
       "1468    -121.99    37.96                 17        2756            423   \n",
       "14906   -117.06     32.6                 33         905            205   \n",
       "10779   -117.91    33.65                 17        1328            377   \n",
       "7934    -118.08    33.82                 26        4259            588   \n",
       "9745     -121.7    36.67                 37         641            129   \n",
       "18768   -122.24    40.51                 23        2216            378   \n",
       "5564    -118.29    33.91                 41        2475            532   \n",
       "7064    -118.03    33.94                 30        2572            521   \n",
       "13637   -117.32    34.08                 41        1359            264   \n",
       "4827    -118.32    34.03                 31        2206            501   \n",
       "\n",
       "      population households median_income ocean_proximity income_cat  \\\n",
       "12347       2725       1300        1.5595          INLAND          2   \n",
       "6263        1020        268         4.025       <1H OCEAN          3   \n",
       "12208        750        247        4.3833       <1H OCEAN          3   \n",
       "6396         721        243        4.7308          INLAND          4   \n",
       "12601        902        356        1.9837          INLAND          2   \n",
       "13354       1219        328        3.8125          INLAND          3   \n",
       "5749         973        349        3.6587       <1H OCEAN          3   \n",
       "18799        513        206         2.225          INLAND          2   \n",
       "15022        776        386        2.8125       <1H OCEAN          2   \n",
       "16834        598        225        4.1827      NEAR OCEAN          3   \n",
       "1468        1228        426        5.5872          INLAND          4   \n",
       "14906        989        222        2.7014      NEAR OCEAN          2   \n",
       "10779        762        344        2.2222       <1H OCEAN          2   \n",
       "7934        1644        581        6.2519       <1H OCEAN          5   \n",
       "9745         458        142        3.3456       <1H OCEAN          3   \n",
       "18768       1006        338         4.559          INLAND          4   \n",
       "5564        1416        470        3.8372       <1H OCEAN          3   \n",
       "7064        1564        501        3.4861       <1H OCEAN          3   \n",
       "13637        786        244        2.5208          INLAND          2   \n",
       "4827        1194        435        1.9531       <1H OCEAN          2   \n",
       "\n",
       "      households_per_population median_income_per_housing_median_age  \n",
       "12347                   3.47963                             0.129958  \n",
       "6263                    1.35392                             0.118382  \n",
       "12208                   2.49067                              0.73055  \n",
       "6396                    2.00555                             0.107518  \n",
       "12601                   1.88914                            0.0536135  \n",
       "13354                   1.46924                             0.254167  \n",
       "5749                    1.91675                            0.0813044  \n",
       "18799                   2.30604                            0.0855769  \n",
       "15022                   2.32216                              0.09375  \n",
       "16834                   2.22575                             0.134926  \n",
       "1468                     2.2443                             0.328659  \n",
       "14906                  0.915066                            0.0818606  \n",
       "10779                   1.74278                             0.130718  \n",
       "7934                    2.59063                             0.240458  \n",
       "9745                    1.39956                            0.0904216  \n",
       "18768                   2.20278                             0.198217  \n",
       "5564                    1.74788                            0.0935902  \n",
       "7064                     1.6445                             0.116203  \n",
       "13637                   1.72901                            0.0614829  \n",
       "4827                    1.84757                            0.0630032  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing_extra_attribs.sample(20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 转换流水线\n",
    "尝试创建一个覆盖完整的数据准备和最终预测的流水线 15分 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dataFrame -> series -> ndarray(np数组)\n",
    "class DataFrameSelector(BaseEstimator, TransformerMixin):\n",
    "    def __init__(self, attribute_names):\n",
    "        self.attribute_names = attribute_names\n",
    "    def fit(self, X, y=None):\n",
    "        return self\n",
    "    def transform(self, X):\n",
    "        return X[self.attribute_names].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['longitude',\n",
       " 'latitude',\n",
       " 'housing_median_age',\n",
       " 'total_rooms',\n",
       " 'total_bedrooms',\n",
       " 'population',\n",
       " 'households',\n",
       " 'median_income',\n",
       " 'income_cat']"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_attribs=list(housing_num)\n",
    "num_attribs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "num_pipeline = Pipeline([\n",
    "        ('selector', DataFrameSelector(num_attribs)),\n",
    "        ('imputer', SimpleImputer(strategy=\"median\")),\n",
    "        ('attribs_adder', CombinedAttributesAdder()),\n",
    "        ('std_scaler', StandardScaler()),\n",
    "    ])\n",
    "#和独热编码并行的，可以进行下一个pipline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.base import TransformerMixin \n",
    "class MyLabelBinarizer(TransformerMixin):\n",
    "    def __init__(self, *args, **kwargs):\n",
    "        self.encoder = LabelBinarizer(*args, **kwargs)\n",
    "    def fit(self, x, y=0):\n",
    "        #调用系统自带的\n",
    "        self.encoder.fit(x)\n",
    "        return self\n",
    "    def transform(self, x, y=0):\n",
    "        return self.encoder.transform(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_attribs = ['ocean_proximity']\n",
    "from sklearn.preprocessing import LabelBinarizer\n",
    "\n",
    "cat_pipeline = Pipeline([\n",
    "        ('selector', DataFrameSelector(cat_attribs)),               \n",
    "        ('LabelBinarizer', MyLabelBinarizer()),\n",
    "    ])\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.pipeline import FeatureUnion\n",
    "\n",
    "full_pipeline = FeatureUnion(transformer_list=[\n",
    "        (\"num_pipline\", num_pipeline,),\n",
    "        ('cat_pipline', cat_pipeline),\n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "housing_finished= full_pipeline.fit_transform(housing)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>11</th>\n",
       "      <th>12</th>\n",
       "      <th>13</th>\n",
       "      <th>14</th>\n",
       "      <th>15</th>\n",
       "      <th>16</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.156043</td>\n",
       "      <td>0.771950</td>\n",
       "      <td>0.743331</td>\n",
       "      <td>-0.493234</td>\n",
       "      <td>-0.445438</td>\n",
       "      <td>-0.636211</td>\n",
       "      <td>-0.420698</td>\n",
       "      <td>-0.614937</td>\n",
       "      <td>-0.954456</td>\n",
       "      <td>0.185754</td>\n",
       "      <td>-0.478016</td>\n",
       "      <td>0.155318</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-1.176025</td>\n",
       "      <td>0.659695</td>\n",
       "      <td>-1.165317</td>\n",
       "      <td>-0.908967</td>\n",
       "      <td>-1.036928</td>\n",
       "      <td>-0.998331</td>\n",
       "      <td>-1.022227</td>\n",
       "      <td>1.336459</td>\n",
       "      <td>1.890305</td>\n",
       "      <td>0.194402</td>\n",
       "      <td>0.949616</td>\n",
       "      <td>-0.836289</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.186849</td>\n",
       "      <td>-1.342183</td>\n",
       "      <td>0.186642</td>\n",
       "      <td>-0.313660</td>\n",
       "      <td>-0.153345</td>\n",
       "      <td>-0.433639</td>\n",
       "      <td>-0.093318</td>\n",
       "      <td>-0.532046</td>\n",
       "      <td>-0.954456</td>\n",
       "      <td>0.084499</td>\n",
       "      <td>-0.400050</td>\n",
       "      <td>0.422200</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.017068</td>\n",
       "      <td>0.313576</td>\n",
       "      <td>-0.290520</td>\n",
       "      <td>-0.362762</td>\n",
       "      <td>-0.396756</td>\n",
       "      <td>0.036041</td>\n",
       "      <td>-0.383436</td>\n",
       "      <td>-1.045566</td>\n",
       "      <td>-0.954456</td>\n",
       "      <td>-0.590968</td>\n",
       "      <td>-0.462569</td>\n",
       "      <td>-0.196453</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.492474</td>\n",
       "      <td>-0.659299</td>\n",
       "      <td>-0.926736</td>\n",
       "      <td>1.856193</td>\n",
       "      <td>2.412211</td>\n",
       "      <td>2.724154</td>\n",
       "      <td>2.570975</td>\n",
       "      <td>-0.441437</td>\n",
       "      <td>-0.006202</td>\n",
       "      <td>-0.415358</td>\n",
       "      <td>-0.082529</td>\n",
       "      <td>0.269928</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4         5         6  \\\n",
       "0 -1.156043  0.771950  0.743331 -0.493234 -0.445438 -0.636211 -0.420698   \n",
       "1 -1.176025  0.659695 -1.165317 -0.908967 -1.036928 -0.998331 -1.022227   \n",
       "2  1.186849 -1.342183  0.186642 -0.313660 -0.153345 -0.433639 -0.093318   \n",
       "3 -0.017068  0.313576 -0.290520 -0.362762 -0.396756  0.036041 -0.383436   \n",
       "4  0.492474 -0.659299 -0.926736  1.856193  2.412211  2.724154  2.570975   \n",
       "\n",
       "          7         8         9        10        11   12   13   14   15   16  \n",
       "0 -0.614937 -0.954456  0.185754 -0.478016  0.155318  1.0  0.0  0.0  0.0  0.0  \n",
       "1  1.336459  1.890305  0.194402  0.949616 -0.836289  1.0  0.0  0.0  0.0  0.0  \n",
       "2 -0.532046 -0.954456  0.084499 -0.400050  0.422200  0.0  0.0  0.0  0.0  1.0  \n",
       "3 -1.045566 -0.954456 -0.590968 -0.462569 -0.196453  0.0  1.0  0.0  0.0  0.0  \n",
       "4 -0.441437 -0.006202 -0.415358 -0.082529  0.269928  1.0  0.0  0.0  0.0  0.0  "
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing_finished = pd.DataFrame(housing_finished)\n",
    "housing_finished.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "17606    286600.0\n",
       "18632    340600.0\n",
       "14650    196900.0\n",
       "3230      46300.0\n",
       "3555     254500.0\n",
       "           ...   \n",
       "6563     240200.0\n",
       "12053    113000.0\n",
       "13908     97800.0\n",
       "11159    225900.0\n",
       "15775    500001.0\n",
       "Name: median_house_value, Length: 16512, dtype: float64"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing_labels"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 模型调参和网格搜索\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=5, error_score=nan,\n",
       "             estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,\n",
       "                                             criterion='mse', max_depth=None,\n",
       "                                             max_features='auto',\n",
       "                                             max_leaf_nodes=None,\n",
       "                                             max_samples=None,\n",
       "                                             min_impurity_decrease=0.0,\n",
       "                                             min_impurity_split=None,\n",
       "                                             min_samples_leaf=1,\n",
       "                                             min_samples_split=2,\n",
       "                                             min_weight_fraction_leaf=0.0,\n",
       "                                             n_estimators=100, n_jobs=None,\n",
       "                                             oob_score=False, random_state=42,\n",
       "                                             verbose=0, warm_start=False),\n",
       "             iid='deprecated', n_jobs=None,\n",
       "             param_grid=[{'max_features': [2, 4, 6, 8],\n",
       "                          'n_estimators': [3, 10, 30]},\n",
       "                         {'bootstrap': [False], 'max_features': [2, 3, 4],\n",
       "                          'n_estimators': [3, 10]}],\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score=True,\n",
       "             scoring='neg_mean_squared_error', verbose=0)"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "#随机森林\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "\n",
    "param_grid = [\n",
    "    {'n_estimators': [3, 10, 30], 'max_features': [2, 4, 6, 8]},\n",
    "    {'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},\n",
    "  ]\n",
    "#初始化随机森林算法\n",
    "forest_reg = RandomForestRegressor(random_state=42)\n",
    "#放入网格当中，使用五次交叉验证，return_train_score返回测试分数\n",
    "grid_search = GridSearchCV(forest_reg, param_grid, cv=5,\n",
    "                           scoring='neg_mean_squared_error', return_train_score=True)\n",
    "grid_search.fit(housing_finished, housing_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_features': 8, 'n_estimators': 30}"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',\n",
       "                      max_depth=None, max_features=8, max_leaf_nodes=None,\n",
       "                      max_samples=None, min_impurity_decrease=0.0,\n",
       "                      min_impurity_split=None, min_samples_leaf=1,\n",
       "                      min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
       "                      n_estimators=30, n_jobs=None, oob_score=False,\n",
       "                      random_state=42, verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#获取最好的估算器\n",
    "grid_search.best_estimator_\n",
    "#把这些参数复制下来"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "63080.39381757411 {'max_features': 2, 'n_estimators': 3}\n",
      "55721.829901366626 {'max_features': 2, 'n_estimators': 10}\n",
      "53233.04661030947 {'max_features': 2, 'n_estimators': 30}\n",
      "61259.2688254063 {'max_features': 4, 'n_estimators': 3}\n",
      "54022.88296016991 {'max_features': 4, 'n_estimators': 10}\n",
      "51429.142545089184 {'max_features': 4, 'n_estimators': 30}\n",
      "59185.079466104224 {'max_features': 6, 'n_estimators': 3}\n",
      "52753.0651494794 {'max_features': 6, 'n_estimators': 10}\n",
      "50447.81024984791 {'max_features': 6, 'n_estimators': 30}\n",
      "58848.62578798979 {'max_features': 8, 'n_estimators': 3}\n",
      "52402.93237351608 {'max_features': 8, 'n_estimators': 10}\n",
      "50329.8445652517 {'max_features': 8, 'n_estimators': 30}\n",
      "62526.973407498335 {'bootstrap': False, 'max_features': 2, 'n_estimators': 3}\n",
      "54711.59623962437 {'bootstrap': False, 'max_features': 2, 'n_estimators': 10}\n",
      "61360.82632622479 {'bootstrap': False, 'max_features': 3, 'n_estimators': 3}\n",
      "53079.127805938275 {'bootstrap': False, 'max_features': 3, 'n_estimators': 10}\n",
      "58715.8062964337 {'bootstrap': False, 'max_features': 4, 'n_estimators': 3}\n",
      "52171.46699024498 {'bootstrap': False, 'max_features': 4, 'n_estimators': 10}\n"
     ]
    }
   ],
   "source": [
    "cvres = grid_search.cv_results_\n",
    "for mean_score, params in zip(cvres[\"mean_test_score\"], cvres[\"params\"]):\n",
    "    print(np.sqrt(-mean_score), params)\n",
    "#50811.43543872171 {'max_features': 6, 'n_estimators': 30} 这个效果最好"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#我这个选择的误差值最最小"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用GridSearchCV自动搜索‘bedrooms_per_room’ 合成特征是否应该合并到训练集中 15分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "#获取最佳的特征值\n",
    "feature_importances = grid_search.best_estimator_.feature_importances_\n",
    "extra_attribs=[\"households_per_population\", \"median_income_per_housing_median_age\", \"bedrooms_per_room\"]\n",
    "cat_one_hot_attribs = list(encoder.classes_)\n",
    "attributes = num_attribs + extra_attribs + cat_one_hot_attribs\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['longitude',\n",
       " 'latitude',\n",
       " 'housing_median_age',\n",
       " 'total_rooms',\n",
       " 'total_bedrooms',\n",
       " 'population',\n",
       " 'households',\n",
       " 'median_income',\n",
       " 'income_cat',\n",
       " 'households_per_population',\n",
       " 'median_income_per_housing_median_age',\n",
       " 'bedrooms_per_room',\n",
       " '<1H OCEAN',\n",
       " 'INLAND',\n",
       " 'ISLAND',\n",
       " 'NEAR BAY',\n",
       " 'NEAR OCEAN']"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "attributes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(0.2863868527669854, 'median_income'),\n",
       " (0.13038006588271656, 'income_cat'),\n",
       " (0.12665451629350347, 'INLAND'),\n",
       " (0.11917694808529121, 'households_per_population'),\n",
       " (0.07210463715896932, 'longitude'),\n",
       " (0.0694773805445433, 'latitude'),\n",
       " (0.052698640178703286, 'bedrooms_per_room'),\n",
       " (0.035636907060159435, 'housing_median_age'),\n",
       " (0.028119316117977026, 'median_income_per_housing_median_age'),\n",
       " (0.017420453932609308, 'households'),\n",
       " (0.016990702831925917, 'population'),\n",
       " (0.016661414840155965, 'total_bedrooms'),\n",
       " (0.01566428032215272, 'total_rooms'),\n",
       " (0.006691140878481288, '<1H OCEAN'),\n",
       " (0.0033901427979845036, 'NEAR OCEAN'),\n",
       " (0.002509992117161337, 'NEAR BAY'),\n",
       " (3.660819067999369e-05, 'ISLAND')]"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(zip(feature_importances, attributes), reverse = True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "根据上面结果显示，可以合并\n",
    "   * median_income/income_cat\n",
    "   * households_per_population\n",
    "   * bedrooms_per_room\n",
    "   * housing_median_age"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
